In [2]:
import json

# Demonstration of how to load a file that contains secrets without accidentally leaking those secrets
with open('credentials.json') as f:
    data = json.load(f)

    secret_key = data['mongodb']

# We can safely print the length of the secret key. That won't leak any sensitive information.
print(f"My secret key is {len(secret_key)} characters in length.")

My secret key is 68 characters in length.


In [4]:
!pip install pymongo

Collecting pymongo
  Downloading pymongo-4.7.1-cp310-cp310-macosx_10_9_x86_64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.6.1-py3-none-any.whl.metadata (5.8 kB)
Downloading pymongo-4.7.1-cp310-cp310-macosx_10_9_x86_64.whl (486 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.1/486.1 kB[0m [31m697.0 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading dnspython-2.6.1-py3-none-any.whl (307 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.7/307.7 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h[33mDEPRECATION: pytorch-lightning 1.8.3.post0 has a non-standard dependency specifier torch>=1.9.*. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/

In [3]:
import pymongo
import certifi

# Connect to the database using known good certificates
client = pymongo.MongoClient(secret_key, tlsCAFile=certifi.where())
print(f"Using MongoDB version {client.server_info()['version']}.")

# Check what databases exist on this server
all_databases = client.list_database_names()
print(f"This MongoDB server has the databases {all_databases}")

# If we know the correct database to talk to, we connect like this:
data320 = client['data320']

# Here is the list of collections within my database
all_collections = data320.list_collection_names()
print(f"This database has the collections {all_collections}")

Using MongoDB version 7.0.8.
This MongoDB server has the databases ['data320', 'sample_mflix', 'admin', 'local']
This database has the collections ['imdb', 'movies']


In [4]:
import pandas as pd
import re

# Retrieve all records from a collection - this can be a large amount of data!
cursor = data320["imdb"].find({"release_date": re.compile("2010")})

# Convert this information into a Pandas dataframe
imdb = pd.DataFrame(cursor)

# Make sure we've read the information correctly
imdb.head()


Unnamed: 0,_id,id,title,runtime,user_rating,votes,mpaa_rating,release_date,budget,opening_weekend,gross_sales,genres,cast,director,producer,company
0,662870e0c5e83e4c458c5c1a,1196141,Diary of a Wimpy Kid,94,6.2,49261,TV-PG::(V),3/18/2010,15000000.0,22126166.0,75700500.0,"Comedy, Drama, Family","Zachary Gordon, Robert Capron, Rachael Harris,...",Thor Freudenthal,"Nina Jacobson, Jeff Kinney, Brad Simpson, Etha...","Color Force, Dayday Films, Dune Entertainment III"
1,662870e0c5e83e4c458c5c1b,1375666,Inception,148,8.8,2282266,"TV-14::(DLV, TV Rating.)",7/8/2010,160000000.0,62785337.0,825532800.0,"Action, Adventure, Sci-Fi, Thriller","Leonardo DiCaprio, Joseph Gordon-Levitt, Ellio...",Christopher Nolan,"Zakaria Alaoui, John Bernard, Chris Brigham, J...","Warner Bros., Legendary Entertainment, Syncopy"
2,662870e0c5e83e4c458c5c20,435761,Toy Story 3,103,8.3,818143,G,6/12/2010,190000000.0,110307189.0,1068880000.0,"Animation, Adventure, Comedy, Family, Fantasy","Tom Hanks, Tim Allen, Joan Cusack, Ned Beatty,...",Lee Unkrich,"Darla K. Anderson, John Lasseter, Nicole Parad...","Walt Disney Pictures, Pixar Animation Studios"
3,662870e0c5e83e4c458c5c21,926084,Harry Potter and the Deathly Hallows: Part 1,146,7.7,534619,PG-13,11/11/2010,150000000.0,,960283300.0,"Adventure, Family, Fantasy, Mystery","Bill Nighy, Emma Watson, Richard Griffiths, Ha...",David Yates,"David Barron, David Heyman, Tim Lewis, J.K. Ro...","Warner Bros., Heyday Films"
4,662870e0c5e83e4c458c5c22,1285016,The Social Network,120,7.8,686673,TV-14,9/24/2010,40000000.0,22445653.0,224920300.0,"Biography, Drama","Jesse Eisenberg, Rooney Mara, Bryan Barter, Du...",David Fincher,"Dana Brunetti, Ceán Chaffin, Jim Davidson, Mic...","Columbia Pictures, Relativity Media, Scott Rud..."


In [5]:
# convert release dates to datetime type
imdb.release_date = pd.to_datetime(imdb.release_date, errors='coerce')
# convert budget and runtimer to numeric type
imdb.budget = pd.to_numeric(imdb.budget, errors='coerce')
imdb.runtime = pd.to_numeric(imdb.runtime, errors='coerce')

In [16]:
# sorting by lowes budget first
imdb.sort_values(by=['budget']).head()

Unnamed: 0,_id,id,title,runtime,user_rating,votes,mpaa_rating,release_date,budget,opening_weekend,gross_sales,genres,cast,director,producer,company
531,662870e0c5e83e4c458c5e31,1341341,Ceremony,89,5.4,4300,R,2010-09-13,3.0,6920.0,,"Comedy, Romance","Michael Angarano, Uma Thurman, Reece Thompson,...",Max Winkler,"Kathryn Dean, Emilio Diez Barroso, Daniel Dubi...","NALA Films, Polymorphic Pictures"
538,662870e0c5e83e4c458c5e38,1496729,Aaranya Kaandam,126,8.5,4748,Not Rated,2010-10-30,5.0,,70000000.0,"Action, Crime, Thriller","Sampath Raj, Jackie Shroff, Ravi Krishna, Yasm...",Thiagarajan Kumararaja,S.P.B. Charan,Capital Film Works
209,662870e0c5e83e4c458c5cef,1813757,Who Killed Captain Alex?,64,7.9,9172,Not Rated,2010-06-22,200.0,,,"Action, Crime, Mystery, War","Kakule William, Sserunya Ernest, G. Puffs, Kav...",Nabwana I.G.G.,"Alan Hofmanis, Nabwana I.G.G.",Ramon Film Productions
856,662870e0c5e83e4c458c5f78,1757940,Çakal,86,6.6,1590,,2010-10-09,700.0,,,Crime,"Ismail Hacioglu, Ugur Polat, Erkan Can, Çetin ...",Erhan Kozan,"Kaan Korkmaz, Zeynep askin Korkmaz, Mehmet Çelebi",", , , ,"
978,662870e0c5e83e4c458c5ff3,1717229,The Unforgiving,75,4.0,1843,,2010-08-20,5000.0,,,"Horror, Thriller","Ryan Macquet, Claire Opperman, Michael Thompso...",Alastair Orr,"Lorika Boshoff, Ryan Macquet, Alastair Orr","Illusionz Unlimited Entertainment, Kamakazi Pr..."


In [20]:
# sorting by highest budget first
imdb.sort_values(by=['budget'], ascending=False).head()

Unnamed: 0,_id,id,title,runtime,user_rating,votes,mpaa_rating,release_date,budget,opening_weekend,gross_sales,genres,cast,director,producer,company
516,662870e0c5e83e4c458c5e22,1825955,Midnight FM,106,6.5,2671,Not Rated,2010-10-14,7000000000.0,,,"Action, Crime, Thriller","Soo Ae, Yoo Ji-Tae, Ma Dong-seok, Joon-Ha Lee,...",Sang Man Kim,"Geon-yong Choi, Ki-seop Choi, Jung-hoon Je, Ho...","Weekend Cinema, Hong Film"
301,662870e0c5e83e4c458c5d4b,1305797,Enthiran,155,7.1,27354,Not Rated,2010-09-30,1900000000.0,,54017644.0,"Action, Sci-Fi, Thriller","Rajinikanth, Aishwarya Rai Bachchan, Danny Den...",S. Shankar,"Justin Bird, Bruno Canale, Kalanidhi Maran, Ja...","Sun Pictures, Utopia Films"
284,662870e0c5e83e4c458c5d3a,1287878,Poetry,139,7.8,12135,Unrated,2010-05-13,1300000000.0,,,Drama,"Yun Jeong-hie, Lee Da-wit, Kim Hee-ra, Ahn Nae...",Lee Chang-dong,"Sung-min Choi, Lee Dong-ha, Myung-soo Jung, Jo...","UniKorea Pictures, Pine House Film, Diaphana F..."
685,662870e0c5e83e4c458c5ecc,1421036,Goa,164,6.6,1863,Not Rated,2010-01-29,900000000.0,,,Comedy,"Jai, Melanie Marie, Piaa Bajpai, Premgi Amaren...",Venkat Prabhu,"G. Kartik, Ravi Kumar, Harish Ram L.H., Sounda...",Ocher Studios
335,662870e0c5e83e4c458c5d6d,1198101,Kites,123,6.0,13138,Not Rated,2010-05-21,600000000.0,,,"Action, Drama, Romance, Thriller","Hrithik Roshan, Bárbara Mori, Nicholas Brown, ...",Anurag Basu,"Gilley Grey, Gayatri Gulati, James Ordonez, Ra...","Bollywood Hollywood Production, Film Kraft, Re..."


In [23]:
# sorting by lowest runtime first
imdb.sort_values(by=['runtime'], ascending=True).head(20)

Unnamed: 0,_id,id,title,runtime,user_rating,votes,mpaa_rating,release_date,budget,opening_weekend,gross_sales,genres,cast,director,producer,company
727,662870e0c5e83e4c458c5ef6,1433813,Hubble,45,7.6,4380,G,2010-03-19,,410920.0,73209206.0,Documentary,"Leonardo DiCaprio, Scott D. Altman, Andrew J. ...",Toni Myers,"Judy Carroll, Graeme Ferguson, Toni Myers","IMAX Space Ltd., Warner Bros."
988,662870e0c5e83e4c458c5ffd,1828232,Life Cycles,47,8.3,1270,Not Rated,2010-09-21,,,,"Documentary, Sport","Graham Agassiz, Mike Hopkins, Matt Hunter, Cam...","Derek Frankowski, Ryan Gibb",", , , ,",Stance Films
637,662870e0c5e83e4c458c5e9b,1587157,Yu-Gi-Oh! Bonds Beyond Time,49,6.4,1845,Not Rated,2010-01-23,,,,"Animation, Action, Adventure, Comedy, Fantasy,...","Gregory Abbey, Carson Laidlaw, Matthew Labyort...","Martin Billany, Ken'ichi Takeshita","Teruaki Jitsumatsu, Yukio Kawasaki, Yôko Matsu...","Nihon Ad Systems, TV Tokyo"
1028,662870e0c5e83e4c458c602f,1665011,The Lives of Mount Druitt Youth,60,4.8,1529,,2010-08-10,,,,"Documentary, Drama","Drew Pearson, Saad Adam, Evan Adam, Simon Bodi...",Saad Adam,Saad Adam,Question Mark Films
961,662870e0c5e83e4c458c5fe2,1797346,1915 Armenian Genocide,60,4.0,2012,Not Rated,2010-10-22,,,,"Documentary, History",Paul Schaefer,Mark Bedrosian,"Mark Bedrosian, Curtis Jones, Val Mijailovic, ...",", , , ,"
439,662870e0c5e83e4c458c5dd5,1583356,Rammbock: Berlin Undead,63,6.3,5664,R,2010-01-19,,,,"Action, Drama, Horror, Sci-Fi, Thriller","Michael Fuith, Theo Trebs, Anka Graczyk, Emily...",Marvin Kren,"Melanie Berke, Katharina Dufner, Sigrid Hoerner","Zweites Deutsches Fernsehen (ZDF), Das Kleine ..."
209,662870e0c5e83e4c458c5cef,1813757,Who Killed Captain Alex?,64,7.9,9172,Not Rated,2010-06-22,200.0,,,"Action, Crime, Mystery, War","Kakule William, Sserunya Ernest, G. Puffs, Kav...",Nabwana I.G.G.,"Alan Hofmanis, Nabwana I.G.G.",Ramon Film Productions
677,662870e0c5e83e4c458c5ec4,1754177,Mardock Scramble: The First Compression,69,6.4,1482,TV-MA,2010-11-06,,,,"Animation, Action, Drama, Sci-Fi, Thriller","Chris Ayres, Marcy Bannor, Luci Christian, Jus...",Susumu Kudo,"David Del Rio, John Ledford, Eiichi Takahashi","Aniplex, Brosta TV, GoHands, King Records, Sen..."
330,662870e0c5e83e4c458c5d68,1546036,The Drawn Together Movie!,70,6.1,4324,TV-MA::(LSV),2010-03-18,,,,"Animation, Comedy, Mystery","Adam Carolla, Carol Channing, Christine Eberso...",Greg Franklin,"Brendan Burch, Andy Fiedler, Dave Jeser, Richa...","Comedy Central Films, Comedy Central, Six Poin..."
436,662870e0c5e83e4c458c5dd2,1673702,A Cat in Paris,70,6.9,11243,PG,2010-10-15,,,2080634.0,"Animation, Adventure, Comedy, Crime, Family","Dominique Blanc, Bernadette Lafont, Bruno Salo...","Jean-Loup Felicioli, Alain Gagnol","Eric Beckman, Emmanuel Bernard, Annemie Degrys...","Folimage, Centre du Cinéma et de l'Audiovisuel..."


In [24]:
# sorting by highest runtime first
imdb.sort_values(by=['runtime'], ascending=False).head(10)

Unnamed: 0,_id,id,title,runtime,user_rating,votes,mpaa_rating,release_date,budget,opening_weekend,gross_sales,genres,cast,director,producer,company
477,662870e0c5e83e4c458c5dfb,1512888,Ayirathil Oruvan,183,8.0,6134,Not Rated,2010-01-14,320000000.0,,850000000.0,"Action, Adventure","Karthi, Reema Sen, Andrea Jeremiah, Parthiban ...",K. Selvaraghavan,R. Ravindran,"Dream Valley Corporation, Goldmines Telefilms"
796,662870e0c5e83e4c458c5f3b,403645,Burnt by the Sun 2,181,4.3,4316,,2010-04-17,45000000.0,,,"Drama, History, War","Nikita Mikhalkov, Oleg Menshikov, Nadezhda Mik...",Nikita Mikhalkov,"Aleksey Balashov, Sergei Gurevich, Aleksey Kar...","Three T Productions, Ministry of Culture of th..."
610,662870e0c5e83e4c458c5e80,1403047,Aurora,181,6.6,1943,Not Rated,2010-05-14,,,,Drama,"Cristi Puiu, Clara Voda, Valeria Seciu, Catrin...",Cristi Puiu,"Philippe Bober, Sarah Nagel, Bobby Paunescu, A...","Mandragora, Coproduction Office, Bord Cadre Fi..."
829,662870e0c5e83e4c458c5f5c,1646958,The Autobiography of Nicolae Ceausescu,180,7.5,1511,Not Rated,2010-05-18,,,,"Documentary, Biography, History","Nicolae Ceausescu, Elena Ceausescu, Stefan And...",Andrei Ujica,Velvet Moraru,"ICON production, Centrul National al Cinematog..."
66,662870e0c5e83e4c458c5c60,1754109,Les Misérables in Concert: The 25th Anniversary,178,8.8,5996,Not Rated,2010-10-03,,,,"Drama, Music, Musical","Alfie Boe, Norm Lewis, Matt Lucas, Jenny Gallo...","Nick Morris, Laurence Connor, James Powell","Sinead D'Arcy, Clare Donaldson, Cameron Mackin...","Cameron Mackintosh Ltd., Universal Pictures"
437,662870e0c5e83e4c458c5dd3,1582519,Khaleja,170,7.6,9160,Not Rated,2010-10-07,,,180800000.0,"Action, Comedy, Fantasy","Mahesh Babu, Anushka Shetty, Prakash Raj, Brah...",Trivikram Srinivas,"Sahni Bob, Kalyan C., C.V. Rao, Ramesh Singana...","A.N. Combines, S.H. Creations, Sri Kanakaratna..."
741,662870e0c5e83e4c458c5f04,1661031,Brindaavanam,169,7.1,4230,Not Rated,2010-10-14,,,300100000.0,"Action, Comedy, Drama, Musical, Romance","N.T. Rama Rao Jr., Kajal Aggarwal, Samantha Ru...",Vamshi Paidipally,"Lakshman, Dil Raju, Sireesh","Goldmines Telefilms, Sri Venkateswara Creations"
761,662870e0c5e83e4c458c5f18,1185412,Veer,169,4.6,8204,Not Rated,2010-01-22,,,,"Action, Adventure, Drama, Romance, War","Salman Khan, Mithun Chakraborty, Jackie Shroff...",Anil Sharma,"Pratik Galani, Vijay Galani, Kamlesh Kumar, Br...","Eros Worldwide, Vijay Galani Moviez"
784,662870e0c5e83e4c458c5f2f,1637691,Khelein Hum Jee Jaan Sey,168,5.8,1326,,2010-12-03,,,,"Action, Drama, History","Abhishek Bachchan, Deepika Padukone, Sikandar ...",Ashutosh Gowariker,"Ajay Bijli, Sanjeev K. Bijli, Dilip Borkar, La...","Ashutosh Gowariker Productions, UTV Motion Pic..."
795,662870e0c5e83e4c458c5f3a,1613040,Leader,167,8.0,4715,,2010-02-19,,,,Drama,"Rana Daggubati, Richa Langella, Priya Anand, J...",Sekhar Kammula,"Aparna Guhan, Aruna Guhan, Saravanan M., Guhan...",A.V.M. Productions


# Runtime:
  - <h4 style="color:#FAC898">Highest (183min): Ayirathil Oruvan</h4>
    <p>From examining the highest five runtimes in the imdb database, I wouldn't qualify Ayirathil Oruvan as an outlier because it's withing 10 minutes of the next 5 movies on the list. However, it deos have the longest runtime in the database.</p>
  

  - <h4 style="color:#FAC898">Lowest (45min)  : Hubble</h4>
    <p>This movie had the lowest runtime of 45 minutes which is almost qualifies as a "short film". According to <a href:"https://www.paus.tv/lab/feature-film-vs-short-film-a-complete-guide-for-your-understanding#:~:text=A%20feature%20film's%20normal%20screen,the%20two%2C%20such%20as%20structure.">Paus TV </a> a "short film" is 40 minutes or less.</p>


# Budget:
  - <h4 style="color:#FAC898">Highest (7B): Midnight FM</h4>
    <p>this seemed extremely high, my assumption that was in USD. However, it's in WON (South Korean Won). It's a Korean movie. Using current exchange rate that comes out to $5.1M which is actually low in comparison to typical hollywood movies.</p>


  - <h4 style="color:#FAC898">Lowest ($3.2M): Ceremony </h4>
    <p>This was the lowest movie budget in the database. However, it's not very far from the highest budget found earlier due to currency exchange.</p>
    <p style="color:#CC5500">
    The database currency should be unified for all movies to know the actual highest and lowest budgets.
    </p>

