In [1]:
import pandas as pd
from pathlib import Path

In [2]:
# Load the source data file and read and display it with Pandas

movie_data = Path("imdb_movies.csv")
movie_data_df = pd.read_csv(movie_data)
movie_data_df

Unnamed: 0,names,date_x,score,genre,overview,crew,orig_title,status,orig_lang,budget_x,revenue,country
0,Creed III,03/02/2023,73.0,"Drama, Action","After dominating the boxing world, Adonis Cree...","Michael B. Jordan, Adonis Creed, Tessa Thompso...",Creed III,Released,English,75000000.0,2.716167e+08,AU
1,Avatar: The Way of Water,12/15/2022,78.0,"Science Fiction, Adventure, Action",Set more than a decade after the events of the...,"Sam Worthington, Jake Sully, Zoe Saldaña, Neyt...",Avatar: The Way of Water,Released,English,460000000.0,2.316795e+09,AU
2,The Super Mario Bros. Movie,04/05/2023,76.0,"Animation, Adventure, Family, Fantasy, Comedy","While working underground to fix a water main,...","Chris Pratt, Mario (voice), Anya Taylor-Joy, P...",The Super Mario Bros. Movie,Released,English,100000000.0,7.244590e+08,AU
3,Mummies,01/05/2023,70.0,"Animation, Comedy, Family, Adventure, Fantasy","Through a series of unfortunate events, three ...","Óscar Barberán, Thut (voice), Ana Esther Albor...",Momias,Released,"Spanish, Castilian",12300000.0,3.420000e+07,AU
4,Supercell,03/17/2023,61.0,Action,Good-hearted teenager William always lived in ...,"Skeet Ulrich, Roy Cameron, Anne Heche, Dr Quin...",Supercell,Released,English,77000000.0,3.409420e+08,US
...,...,...,...,...,...,...,...,...,...,...,...,...
10173,20th Century Women,12/28/2016,73.0,Drama,"In 1979 Santa Barbara, California, Dorothea Fi...","Annette Bening, Dorothea Fields, Lucas Jade Zu...",20th Century Women,Released,English,7000000.0,9.353729e+06,US
10174,Delta Force 2: The Colombian Connection,08/24/1990,54.0,Action,When DEA agents are taken captive by a ruthles...,"Chuck Norris, Col. Scott McCoy, Billy Drago, R...",Delta Force 2: The Colombian Connection,Released,English,9145817.8,6.698361e+06,US
10175,The Russia House,12/21/1990,61.0,"Drama, Thriller, Romance","Barley Scott Blair, a Lisbon-based editor of R...","Sean Connery, Bartholomew 'Barley' Scott Blair...",The Russia House,Released,English,21800000.0,2.299799e+07,US
10176,Darkman II: The Return of Durant,07/11/1995,55.0,"Action, Adventure, Science Fiction, Thriller, ...",Darkman and Durant return and they hate each o...,"Larry Drake, Robert G. Durant, Arnold Vosloo, ...",Darkman II: The Return of Durant,Released,English,116000000.0,4.756613e+08,US


In [3]:
#Edit down the columns to the ones we need
edited_movies_df = movie_data_df[["names", "date_x", "score", "genre", "orig_lang", "budget_x", "revenue"]]

#And rename them to be more user friendly

edited_movies_df = edited_movies_df.rename(columns={"date_x":"Date",
                                                        "budget_x":"Budget",
                                                        "names":"Name",
                                                        "score":"Score",
                                                        "genre":"Genre",
                                                        "revenue":"Revenue",
                                                        "orig_lang":"Language"  
                                                       })
edited_movies_df

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue
0,Creed III,03/02/2023,73.0,"Drama, Action",English,75000000.0,2.716167e+08
1,Avatar: The Way of Water,12/15/2022,78.0,"Science Fiction, Adventure, Action",English,460000000.0,2.316795e+09
2,The Super Mario Bros. Movie,04/05/2023,76.0,"Animation, Adventure, Family, Fantasy, Comedy",English,100000000.0,7.244590e+08
3,Mummies,01/05/2023,70.0,"Animation, Comedy, Family, Adventure, Fantasy","Spanish, Castilian",12300000.0,3.420000e+07
4,Supercell,03/17/2023,61.0,Action,English,77000000.0,3.409420e+08
...,...,...,...,...,...,...,...
10173,20th Century Women,12/28/2016,73.0,Drama,English,7000000.0,9.353729e+06
10174,Delta Force 2: The Colombian Connection,08/24/1990,54.0,Action,English,9145817.8,6.698361e+06
10175,The Russia House,12/21/1990,61.0,"Drama, Thriller, Romance",English,21800000.0,2.299799e+07
10176,Darkman II: The Return of Durant,07/11/1995,55.0,"Action, Adventure, Science Fiction, Thriller, ...",English,116000000.0,4.756613e+08


In [4]:
#Remove duplicate movie names with the same year

cleaned_movies_df = edited_movies_df.drop_duplicates(subset=["Name","Date"],keep=False)
cleaned_movies_df.reset_index(drop=True, inplace = True)
cleaned_movies_df

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue
0,Creed III,03/02/2023,73.0,"Drama, Action",English,75000000.0,2.716167e+08
1,Avatar: The Way of Water,12/15/2022,78.0,"Science Fiction, Adventure, Action",English,460000000.0,2.316795e+09
2,The Super Mario Bros. Movie,04/05/2023,76.0,"Animation, Adventure, Family, Fantasy, Comedy",English,100000000.0,7.244590e+08
3,Mummies,01/05/2023,70.0,"Animation, Comedy, Family, Adventure, Fantasy","Spanish, Castilian",12300000.0,3.420000e+07
4,Supercell,03/17/2023,61.0,Action,English,77000000.0,3.409420e+08
...,...,...,...,...,...,...,...
9826,20th Century Women,12/28/2016,73.0,Drama,English,7000000.0,9.353729e+06
9827,Delta Force 2: The Colombian Connection,08/24/1990,54.0,Action,English,9145817.8,6.698361e+06
9828,The Russia House,12/21/1990,61.0,"Drama, Thriller, Romance",English,21800000.0,2.299799e+07
9829,Darkman II: The Return of Durant,07/11/1995,55.0,"Action, Adventure, Science Fiction, Thriller, ...",English,116000000.0,4.756613e+08


In [5]:
#Remove any non-applicable date from the Score, Budget, and Revenue columns 
cleaned_movies_df.loc[:, ['Score', 'Budget', 'Revenue']] = cleaned_movies_df[['Score', 'Budget', 'Revenue']].apply(pd.to_numeric, errors="coerce")
xtra_cleaned_movies = cleaned_movies_df.dropna()
xtra_cleaned_movies

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue
0,Creed III,03/02/2023,73.0,"Drama, Action",English,75000000.0,2.716167e+08
1,Avatar: The Way of Water,12/15/2022,78.0,"Science Fiction, Adventure, Action",English,460000000.0,2.316795e+09
2,The Super Mario Bros. Movie,04/05/2023,76.0,"Animation, Adventure, Family, Fantasy, Comedy",English,100000000.0,7.244590e+08
3,Mummies,01/05/2023,70.0,"Animation, Comedy, Family, Adventure, Fantasy","Spanish, Castilian",12300000.0,3.420000e+07
4,Supercell,03/17/2023,61.0,Action,English,77000000.0,3.409420e+08
...,...,...,...,...,...,...,...
9826,20th Century Women,12/28/2016,73.0,Drama,English,7000000.0,9.353729e+06
9827,Delta Force 2: The Colombian Connection,08/24/1990,54.0,Action,English,9145817.8,6.698361e+06
9828,The Russia House,12/21/1990,61.0,"Drama, Thriller, Romance",English,21800000.0,2.299799e+07
9829,Darkman II: The Return of Durant,07/11/1995,55.0,"Action, Adventure, Science Fiction, Thriller, ...",English,116000000.0,4.756613e+08


In [6]:
#Filter out movies that made zero revenue

revenue_only_df = xtra_cleaned_movies.loc[xtra_cleaned_movies["Revenue"] > 0]

revenue_only_df

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue
0,Creed III,03/02/2023,73.0,"Drama, Action",English,75000000.0,2.716167e+08
1,Avatar: The Way of Water,12/15/2022,78.0,"Science Fiction, Adventure, Action",English,460000000.0,2.316795e+09
2,The Super Mario Bros. Movie,04/05/2023,76.0,"Animation, Adventure, Family, Fantasy, Comedy",English,100000000.0,7.244590e+08
3,Mummies,01/05/2023,70.0,"Animation, Comedy, Family, Adventure, Fantasy","Spanish, Castilian",12300000.0,3.420000e+07
4,Supercell,03/17/2023,61.0,Action,English,77000000.0,3.409420e+08
...,...,...,...,...,...,...,...
9826,20th Century Women,12/28/2016,73.0,Drama,English,7000000.0,9.353729e+06
9827,Delta Force 2: The Colombian Connection,08/24/1990,54.0,Action,English,9145817.8,6.698361e+06
9828,The Russia House,12/21/1990,61.0,"Drama, Thriller, Romance",English,21800000.0,2.299799e+07
9829,Darkman II: The Return of Durant,07/11/1995,55.0,"Action, Adventure, Science Fiction, Thriller, ...",English,116000000.0,4.756613e+08


In [7]:
#Add a column for the profit each movie made
revenue_only_df['Profit'] = revenue_only_df['Revenue'] - revenue_only_df['Budget']

revenue_only_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  revenue_only_df['Profit'] = revenue_only_df['Revenue'] - revenue_only_df['Budget']


Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit
0,Creed III,03/02/2023,73.0,"Drama, Action",English,75000000.0,2.716167e+08,1.966167e+08
1,Avatar: The Way of Water,12/15/2022,78.0,"Science Fiction, Adventure, Action",English,460000000.0,2.316795e+09,1.856795e+09
2,The Super Mario Bros. Movie,04/05/2023,76.0,"Animation, Adventure, Family, Fantasy, Comedy",English,100000000.0,7.244590e+08,6.244590e+08
3,Mummies,01/05/2023,70.0,"Animation, Comedy, Family, Adventure, Fantasy","Spanish, Castilian",12300000.0,3.420000e+07,2.190000e+07
4,Supercell,03/17/2023,61.0,Action,English,77000000.0,3.409420e+08,2.639420e+08
...,...,...,...,...,...,...,...,...
9826,20th Century Women,12/28/2016,73.0,Drama,English,7000000.0,9.353729e+06,2.353729e+06
9827,Delta Force 2: The Colombian Connection,08/24/1990,54.0,Action,English,9145817.8,6.698361e+06,-2.447457e+06
9828,The Russia House,12/21/1990,61.0,"Drama, Thriller, Romance",English,21800000.0,2.299799e+07,1.197992e+06
9829,Darkman II: The Return of Durant,07/11/1995,55.0,"Action, Adventure, Science Fiction, Thriller, ...",English,116000000.0,4.756613e+08,3.596613e+08


In [8]:
# Convert the Date column to datetime format
revenue_only_df['Date'] = pd.to_datetime(revenue_only_df['Date'])

#Filter the data to the date range we're looking for
start_date = "1990-01-01"
end_date = "2019-12-31"
date_range_df = revenue_only_df.loc[(revenue_only_df["Date"] >= start_date) & (revenue_only_df["Date"] <= end_date)]

date_range_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  revenue_only_df['Date'] = pd.to_datetime(revenue_only_df['Date'])


Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit
22,The Passion of the Christ,2004-02-25,74.0,Drama,English,25000000.0,622313635.0,597313635.0
35,John Wick: Chapter 2,2017-05-18,73.0,"Action, Thriller, Crime",English,40000000.0,171539887.0,131539887.0
39,John Wick: Chapter 3 - Parabellum,2019-05-16,74.0,"Action, Thriller, Crime",English,55000000.0,326709727.0,271709727.0
58,A Bronx Tale,1994-05-20,78.0,"Drama, Crime",English,10000000.0,17287898.0,7287898.0
61,The Forbidden Legend: Sex & Chopsticks 2,2009-03-04,60.0,"Comedy, Drama, Romance",Cantonese,89600000.0,318375960.2,228775960.2
...,...,...,...,...,...,...,...,...
9824,The Seven Deadly Sins: Prisoners of the Sky,2018-08-18,76.0,"Action, Adventure, Fantasy, Animation",Japanese,62600000.0,706002070.8,643402070.8
9826,20th Century Women,2016-12-28,73.0,Drama,English,7000000.0,9353729.0,2353729.0
9827,Delta Force 2: The Colombian Connection,1990-08-24,54.0,Action,English,9145817.8,6698361.0,-2447456.8
9828,The Russia House,1990-12-21,61.0,"Drama, Thriller, Romance",English,21800000.0,22997992.0,1197992.0


In [9]:
#Check for excess symbols in the Genre column
date_range_df["Genre"].unique()

array(['Drama', 'Action,\xa0Thriller,\xa0Crime', 'Drama,\xa0Crime', ...,
       'History,\xa0Drama,\xa0Family',
       'Adventure,\xa0Comedy,\xa0Family,\xa0Science Fiction,\xa0Action',
       'Action,\xa0Adventure,\xa0Science Fiction,\xa0Thriller,\xa0Horror'],
      dtype=object)

In [10]:
# Remove the excess characters in the Genre column
date_range_df['Genre'] = date_range_df['Genre'].str.replace('\xa0', '')

date_range_df['Genre'].unique()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  date_range_df['Genre'] = date_range_df['Genre'].str.replace('\xa0', '')


array(['Drama', 'Action,Thriller,Crime', 'Drama,Crime', ...,
       'History,Drama,Family',
       'Adventure,Comedy,Family,Science Fiction,Action',
       'Action,Adventure,Science Fiction,Thriller,Horror'], dtype=object)

In [11]:
# Initialize an empty set to store all unique genres
unique_genres = set()

# Iterate over the DataFrame rows
for index, row in date_range_df.iterrows():
    # Split the 'Genre' column by commas and add each genre to the set
    movie_genres = [genre.strip() for genre in row['Genre'].split(',')]
    unique_genres.update(movie_genres)

# Convert the set of unique genres to a list
unique_genres = list(unique_genres)

# Iterate over the DataFrame rows again to create genre columns
for index, row in date_range_df.iterrows():
    # Split the 'Genre' column by commas and create a dictionary to store genre values
    movie_genres = {genre: 1 if genre.strip() in row['Genre'] else 0 for genre in unique_genres}
    
    # Update the DataFrame row with genre values
    for genre, value in movie_genres.items():
        date_range_df.at[index, genre] = value

# Print the updated DataFrame
date_range_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  date_range_df.at[index, genre] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  date_range_df.at[index, genre] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  date_range_df.at[index, genre] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_in

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit,History,Animation,...,Music,Romance,Fantasy,Thriller,Action,War,Mystery,Western,Drama,TV Movie
22,The Passion of the Christ,2004-02-25,74.0,Drama,English,25000000.0,622313635.0,597313635.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
35,John Wick: Chapter 2,2017-05-18,73.0,"Action,Thriller,Crime",English,40000000.0,171539887.0,131539887.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
39,John Wick: Chapter 3 - Parabellum,2019-05-16,74.0,"Action,Thriller,Crime",English,55000000.0,326709727.0,271709727.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
58,A Bronx Tale,1994-05-20,78.0,"Drama,Crime",English,10000000.0,17287898.0,7287898.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
61,The Forbidden Legend: Sex & Chopsticks 2,2009-03-04,60.0,"Comedy,Drama,Romance",Cantonese,89600000.0,318375960.2,228775960.2,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9824,The Seven Deadly Sins: Prisoners of the Sky,2018-08-18,76.0,"Action,Adventure,Fantasy,Animation",Japanese,62600000.0,706002070.8,643402070.8,0.0,1.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
9826,20th Century Women,2016-12-28,73.0,Drama,English,7000000.0,9353729.0,2353729.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
9827,Delta Force 2: The Colombian Connection,1990-08-24,54.0,Action,English,9145817.8,6698361.0,-2447456.8,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
9828,The Russia House,1990-12-21,61.0,"Drama,Thriller,Romance",English,21800000.0,22997992.0,1197992.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [12]:
#Remove blank space in around the words in the Language column
date_range_df["Language"] = date_range_df["Language"].str.lstrip().str.rstrip()

#Create a data frame with only English Movies
English_Movies_df = date_range_df.loc[date_range_df["Language"] == "English"]

English_Movies_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  date_range_df["Language"] = date_range_df["Language"].str.lstrip().str.rstrip()


Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit,History,Animation,...,Music,Romance,Fantasy,Thriller,Action,War,Mystery,Western,Drama,TV Movie
22,The Passion of the Christ,2004-02-25,74.0,Drama,English,25000000.0,6.223136e+08,5.973136e+08,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
35,John Wick: Chapter 2,2017-05-18,73.0,"Action,Thriller,Crime",English,40000000.0,1.715399e+08,1.315399e+08,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
39,John Wick: Chapter 3 - Parabellum,2019-05-16,74.0,"Action,Thriller,Crime",English,55000000.0,3.267097e+08,2.717097e+08,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
58,A Bronx Tale,1994-05-20,78.0,"Drama,Crime",English,10000000.0,1.728790e+07,7.287898e+06,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
68,Avatar,2009-12-17,76.0,"Action,Adventure,Fantasy,Science Fiction",English,237000000.0,2.923706e+09,2.686706e+09,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9823,The Love Guru,2008-07-10,42.0,"Comedy,Romance",English,62000000.0,4.015902e+07,-2.184098e+07,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9826,20th Century Women,2016-12-28,73.0,Drama,English,7000000.0,9.353729e+06,2.353729e+06,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
9827,Delta Force 2: The Colombian Connection,1990-08-24,54.0,Action,English,9145817.8,6.698361e+06,-2.447457e+06,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
9828,The Russia House,1990-12-21,61.0,"Drama,Thriller,Romance",English,21800000.0,2.299799e+07,1.197992e+06,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [13]:
#Create a data frame with only Non-English Movies
Non_English_Movies_df = date_range_df.loc[date_range_df["Language"] != "English"]

Non_English_Movies_df

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit,History,Animation,...,Music,Romance,Fantasy,Thriller,Action,War,Mystery,Western,Drama,TV Movie
61,The Forbidden Legend: Sex & Chopsticks 2,2009-03-04,60.0,"Comedy,Drama,Romance",Cantonese,89600000.0,318375960.2,228775960.2,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
107,Demon Slayer: Kimetsu no Yaiba Sibling's Bond,2019-03-29,80.0,"Action,Animation,Fantasy",Japanese,129000000.0,645532173.4,516532173.4,0.0,1.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
204,The Ten Commandments: The Movie,2016-01-28,72.0,Drama,Portuguese,39307171.0,37070177.0,-2236994.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
273,3-D Sex and Zen: Extreme Ecstasy,2011-04-14,48.0,"Drama,Adventure,Romance,Action,History",Cantonese,3500000.0,10308238.0,6808238.0,1.0,0.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
283,The Last: Naruto the Movie,2014-12-06,78.0,"Action,Romance,Animation",Japanese,903852.0,19840000.0,18936148.0,0.0,1.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9803,My Student's Mom 2,2017-11-09,10.0,Romance,Korean,201940000.0,38157314.0,-163782686.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9811,The Master,1992-05-28,64.0,Action,Cantonese,37500000.0,29044338.0,-8455662.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
9813,Miss Violence,2013-11-07,71.0,Drama,Greek,100000000.0,369861963.8,269861963.8,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
9819,On Your Wedding Day,2018-08-22,75.0,Romance,Korean,7397000.0,20895905.0,13498905.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
English_Movies_df.reset_index(drop=True, inplace=True)

English_Movies_df

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit,History,Animation,...,Music,Romance,Fantasy,Thriller,Action,War,Mystery,Western,Drama,TV Movie
0,The Passion of the Christ,2004-02-25,74.0,Drama,English,25000000.0,6.223136e+08,5.973136e+08,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,John Wick: Chapter 2,2017-05-18,73.0,"Action,Thriller,Crime",English,40000000.0,1.715399e+08,1.315399e+08,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
2,John Wick: Chapter 3 - Parabellum,2019-05-16,74.0,"Action,Thriller,Crime",English,55000000.0,3.267097e+08,2.717097e+08,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
3,A Bronx Tale,1994-05-20,78.0,"Drama,Crime",English,10000000.0,1.728790e+07,7.287898e+06,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,Avatar,2009-12-17,76.0,"Action,Adventure,Fantasy,Science Fiction",English,237000000.0,2.923706e+09,2.686706e+09,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4770,The Love Guru,2008-07-10,42.0,"Comedy,Romance",English,62000000.0,4.015902e+07,-2.184098e+07,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4771,20th Century Women,2016-12-28,73.0,Drama,English,7000000.0,9.353729e+06,2.353729e+06,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4772,Delta Force 2: The Colombian Connection,1990-08-24,54.0,Action,English,9145817.8,6.698361e+06,-2.447457e+06,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4773,The Russia House,1990-12-21,61.0,"Drama,Thriller,Romance",English,21800000.0,2.299799e+07,1.197992e+06,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


In [15]:
Non_English_Movies_df.reset_index(drop=True, inplace=True)

Non_English_Movies_df

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit,History,Animation,...,Music,Romance,Fantasy,Thriller,Action,War,Mystery,Western,Drama,TV Movie
0,The Forbidden Legend: Sex & Chopsticks 2,2009-03-04,60.0,"Comedy,Drama,Romance",Cantonese,89600000.0,318375960.2,228775960.2,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,Demon Slayer: Kimetsu no Yaiba Sibling's Bond,2019-03-29,80.0,"Action,Animation,Fantasy",Japanese,129000000.0,645532173.4,516532173.4,0.0,1.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,The Ten Commandments: The Movie,2016-01-28,72.0,Drama,Portuguese,39307171.0,37070177.0,-2236994.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,3-D Sex and Zen: Extreme Ecstasy,2011-04-14,48.0,"Drama,Adventure,Romance,Action,History",Cantonese,3500000.0,10308238.0,6808238.0,1.0,0.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
4,The Last: Naruto the Movie,2014-12-06,78.0,"Action,Romance,Animation",Japanese,903852.0,19840000.0,18936148.0,0.0,1.0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1472,My Student's Mom 2,2017-11-09,10.0,Romance,Korean,201940000.0,38157314.0,-163782686.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1473,The Master,1992-05-28,64.0,Action,Cantonese,37500000.0,29044338.0,-8455662.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1474,Miss Violence,2013-11-07,71.0,Drama,Greek,100000000.0,369861963.8,269861963.8,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1475,On Your Wedding Day,2018-08-22,75.0,Romance,Korean,7397000.0,20895905.0,13498905.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
# Export the final data frames, English and Non English into csv files
English_Movies_Data = English_Movies_df.to_csv("English_Movies_Data.csv", index=False)

Non_English_Movies_Data = Non_English_Movies_df.to_csv("Non_English_Movies_Data.csv", index=False)