In [1]:
import pandas as pd
from pathlib import Path

In [2]:
# Load the source data file and read and display it with Pandas

movie_data = Path("imdb_movies.csv")
movie_data_df = pd.read_csv(movie_data)
movie_data_df

# Set global float formatting to make the columns with dollar amounts easier to read
pd.options.display.float_format = '{:,.2f}'.format

In [3]:
#Edit down the columns to the ones we need
edited_movies_df = movie_data_df[["names", "date_x", "score", "genre", "orig_lang", "budget_x", "revenue"]]

#And rename them to be more user friendly

edited_movies_df = edited_movies_df.rename(columns={"date_x":"Date",
                                                        "budget_x":"Budget",
                                                        "names":"Name",
                                                        "score":"Score",
                                                        "genre":"Genre",
                                                        "revenue":"Revenue",
                                                        "orig_lang":"Language"
                                                       })

#Edit the "Year" column to remove the day and month
# edited_movies_df.loc[:,"Year"]=pd.to_datetime(edited_movies_df["Year"], errors='coerce').dt.year.astype(int)

edited_movies_df

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue
0,Creed III,03/02/2023,73.00,"Drama, Action",English,75000000.00,271616668.00
1,Avatar: The Way of Water,12/15/2022,78.00,"Science Fiction, Adventure, Action",English,460000000.00,2316794914.00
2,The Super Mario Bros. Movie,04/05/2023,76.00,"Animation, Adventure, Family, Fantasy, Comedy",English,100000000.00,724459031.00
3,Mummies,01/05/2023,70.00,"Animation, Comedy, Family, Adventure, Fantasy","Spanish, Castilian",12300000.00,34200000.00
4,Supercell,03/17/2023,61.00,Action,English,77000000.00,340941958.60
...,...,...,...,...,...,...,...
10173,20th Century Women,12/28/2016,73.00,Drama,English,7000000.00,9353729.00
10174,Delta Force 2: The Colombian Connection,08/24/1990,54.00,Action,English,9145817.80,6698361.00
10175,The Russia House,12/21/1990,61.00,"Drama, Thriller, Romance",English,21800000.00,22997992.00
10176,Darkman II: The Return of Durant,07/11/1995,55.00,"Action, Adventure, Science Fiction, Thriller, ...",English,116000000.00,475661306.00


In [4]:
#Remove duplicate movie names with the same year

cleaned_movies_df = edited_movies_df.drop_duplicates(subset=["Name","Date"],keep=False)
cleaned_movies_df.reset_index(drop=True, inplace = True)
cleaned_movies_df

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue
0,Creed III,03/02/2023,73.00,"Drama, Action",English,75000000.00,271616668.00
1,Avatar: The Way of Water,12/15/2022,78.00,"Science Fiction, Adventure, Action",English,460000000.00,2316794914.00
2,The Super Mario Bros. Movie,04/05/2023,76.00,"Animation, Adventure, Family, Fantasy, Comedy",English,100000000.00,724459031.00
3,Mummies,01/05/2023,70.00,"Animation, Comedy, Family, Adventure, Fantasy","Spanish, Castilian",12300000.00,34200000.00
4,Supercell,03/17/2023,61.00,Action,English,77000000.00,340941958.60
...,...,...,...,...,...,...,...
9826,20th Century Women,12/28/2016,73.00,Drama,English,7000000.00,9353729.00
9827,Delta Force 2: The Colombian Connection,08/24/1990,54.00,Action,English,9145817.80,6698361.00
9828,The Russia House,12/21/1990,61.00,"Drama, Thriller, Romance",English,21800000.00,22997992.00
9829,Darkman II: The Return of Durant,07/11/1995,55.00,"Action, Adventure, Science Fiction, Thriller, ...",English,116000000.00,475661306.00


In [5]:
#Remove any non-applicable date from the Score, Budget, and Revenue columns 
cleaned_movies_df.loc[:, ['Score', 'Budget', 'Revenue']] = cleaned_movies_df[['Score', 'Budget', 'Revenue']].apply(pd.to_numeric, errors="coerce")
xtra_cleaned_movies = cleaned_movies_df.dropna()
xtra_cleaned_movies

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue
0,Creed III,03/02/2023,73.00,"Drama, Action",English,75000000.00,271616668.00
1,Avatar: The Way of Water,12/15/2022,78.00,"Science Fiction, Adventure, Action",English,460000000.00,2316794914.00
2,The Super Mario Bros. Movie,04/05/2023,76.00,"Animation, Adventure, Family, Fantasy, Comedy",English,100000000.00,724459031.00
3,Mummies,01/05/2023,70.00,"Animation, Comedy, Family, Adventure, Fantasy","Spanish, Castilian",12300000.00,34200000.00
4,Supercell,03/17/2023,61.00,Action,English,77000000.00,340941958.60
...,...,...,...,...,...,...,...
9826,20th Century Women,12/28/2016,73.00,Drama,English,7000000.00,9353729.00
9827,Delta Force 2: The Colombian Connection,08/24/1990,54.00,Action,English,9145817.80,6698361.00
9828,The Russia House,12/21/1990,61.00,"Drama, Thriller, Romance",English,21800000.00,22997992.00
9829,Darkman II: The Return of Durant,07/11/1995,55.00,"Action, Adventure, Science Fiction, Thriller, ...",English,116000000.00,475661306.00


In [6]:
#Filter out movies that made zero revenue

revenue_only_df = xtra_cleaned_movies.loc[xtra_cleaned_movies["Revenue"] > 0]

revenue_only_df

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue
0,Creed III,03/02/2023,73.00,"Drama, Action",English,75000000.00,271616668.00
1,Avatar: The Way of Water,12/15/2022,78.00,"Science Fiction, Adventure, Action",English,460000000.00,2316794914.00
2,The Super Mario Bros. Movie,04/05/2023,76.00,"Animation, Adventure, Family, Fantasy, Comedy",English,100000000.00,724459031.00
3,Mummies,01/05/2023,70.00,"Animation, Comedy, Family, Adventure, Fantasy","Spanish, Castilian",12300000.00,34200000.00
4,Supercell,03/17/2023,61.00,Action,English,77000000.00,340941958.60
...,...,...,...,...,...,...,...
9826,20th Century Women,12/28/2016,73.00,Drama,English,7000000.00,9353729.00
9827,Delta Force 2: The Colombian Connection,08/24/1990,54.00,Action,English,9145817.80,6698361.00
9828,The Russia House,12/21/1990,61.00,"Drama, Thriller, Romance",English,21800000.00,22997992.00
9829,Darkman II: The Return of Durant,07/11/1995,55.00,"Action, Adventure, Science Fiction, Thriller, ...",English,116000000.00,475661306.00


In [7]:
#Add a column for the profit each movie made
revenue_only_df['Profit'] = revenue_only_df['Revenue'] - revenue_only_df['Budget']

revenue_only_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  revenue_only_df['Profit'] = revenue_only_df['Revenue'] - revenue_only_df['Budget']


Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit
0,Creed III,03/02/2023,73.00,"Drama, Action",English,75000000.00,271616668.00,196616668.00
1,Avatar: The Way of Water,12/15/2022,78.00,"Science Fiction, Adventure, Action",English,460000000.00,2316794914.00,1856794914.00
2,The Super Mario Bros. Movie,04/05/2023,76.00,"Animation, Adventure, Family, Fantasy, Comedy",English,100000000.00,724459031.00,624459031.00
3,Mummies,01/05/2023,70.00,"Animation, Comedy, Family, Adventure, Fantasy","Spanish, Castilian",12300000.00,34200000.00,21900000.00
4,Supercell,03/17/2023,61.00,Action,English,77000000.00,340941958.60,263941958.60
...,...,...,...,...,...,...,...,...
9826,20th Century Women,12/28/2016,73.00,Drama,English,7000000.00,9353729.00,2353729.00
9827,Delta Force 2: The Colombian Connection,08/24/1990,54.00,Action,English,9145817.80,6698361.00,-2447456.80
9828,The Russia House,12/21/1990,61.00,"Drama, Thriller, Romance",English,21800000.00,22997992.00,1197992.00
9829,Darkman II: The Return of Durant,07/11/1995,55.00,"Action, Adventure, Science Fiction, Thriller, ...",English,116000000.00,475661306.00,359661306.00


In [10]:
# Convert the 'date' column to datetime format
revenue_only_df['Date'] = pd.to_datetime(revenue_only_df['Date'])

# Filter out rows with invalid dates
revenue_only_df = revenue_only_df.dropna(subset=["Date"])

# Filter the DataFrame to the years we're looking for

# Filter the DataFrame based on a specific date range
start_date = pd.to_datetime('1990-01-01')
end_date = pd.to_datetime('2019-12-31')

date_range_df = revenue_only_df[(revenue_only_df['Date'] >= start_date) & (revenue_only_df['Date'] <= end_date)]

date_range_df

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit
22,The Passion of the Christ,2004-02-25,74.00,Drama,English,25000000.00,622313635.00,597313635.00
35,John Wick: Chapter 2,2017-05-18,73.00,"Action, Thriller, Crime",English,40000000.00,171539887.00,131539887.00
39,John Wick: Chapter 3 - Parabellum,2019-05-16,74.00,"Action, Thriller, Crime",English,55000000.00,326709727.00,271709727.00
58,A Bronx Tale,1994-05-20,78.00,"Drama, Crime",English,10000000.00,17287898.00,7287898.00
61,The Forbidden Legend: Sex & Chopsticks 2,2009-03-04,60.00,"Comedy, Drama, Romance",Cantonese,89600000.00,318375960.20,228775960.20
...,...,...,...,...,...,...,...,...
9824,The Seven Deadly Sins: Prisoners of the Sky,2018-08-18,76.00,"Action, Adventure, Fantasy, Animation",Japanese,62600000.00,706002070.80,643402070.80
9826,20th Century Women,2016-12-28,73.00,Drama,English,7000000.00,9353729.00,2353729.00
9827,Delta Force 2: The Colombian Connection,1990-08-24,54.00,Action,English,9145817.80,6698361.00,-2447456.80
9828,The Russia House,1990-12-21,61.00,"Drama, Thriller, Romance",English,21800000.00,22997992.00,1197992.00


In [11]:
#Check for excess symbols in the Genre column
date_range_df["Genre"].unique()

array(['Drama', 'Action,\xa0Thriller,\xa0Crime', 'Drama,\xa0Crime', ...,
       'History,\xa0Drama,\xa0Family',
       'Adventure,\xa0Comedy,\xa0Family,\xa0Science Fiction,\xa0Action',
       'Action,\xa0Adventure,\xa0Science Fiction,\xa0Thriller,\xa0Horror'],
      dtype=object)

In [12]:
# Remove the excess characters in the Genre column
date_range_df['Genre'] = date_range_df['Genre'].str.replace('\xa0', '')

date_range_df['Genre'].unique()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  date_range_df['Genre'] = date_range_df['Genre'].str.replace('\xa0', '')


array(['Drama', 'Action,Thriller,Crime', 'Drama,Crime', ...,
       'History,Drama,Family',
       'Adventure,Comedy,Family,Science Fiction,Action',
       'Action,Adventure,Science Fiction,Thriller,Horror'], dtype=object)

In [13]:
# Initialize an empty set to store all unique genres
unique_genres = set()

# Iterate over the DataFrame rows
for index, row in date_range_df.iterrows():
    # Split the 'Genre' column by commas and add each genre to the set
    movie_genres = [genre.strip() for genre in row['Genre'].split(',')]
    unique_genres.update(movie_genres)

# Convert the set of unique genres to a list
unique_genres = list(unique_genres)

# Iterate over the DataFrame rows again to create genre columns
for index, row in date_range_df.iterrows():
    # Split the 'Genre' column by commas and create a dictionary to store genre values
    movie_genres = {genre: 1 if genre.strip() in row['Genre'] else 0 for genre in unique_genres}
    
    # Update the DataFrame row with genre values
    for genre, value in movie_genres.items():
        date_range_df.at[index, genre] = value

# Print the updated DataFrame
date_range_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  date_range_df.at[index, genre] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  date_range_df.at[index, genre] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  date_range_df.at[index, genre] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_in

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit,Animation,Family,...,Mystery,Fantasy,History,Romance,Documentary,Comedy,War,Drama,Crime,Thriller
22,The Passion of the Christ,2004-02-25,74.00,Drama,English,25000000.00,622313635.00,597313635.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00
35,John Wick: Chapter 2,2017-05-18,73.00,"Action,Thriller,Crime",English,40000000.00,171539887.00,131539887.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,1.00
39,John Wick: Chapter 3 - Parabellum,2019-05-16,74.00,"Action,Thriller,Crime",English,55000000.00,326709727.00,271709727.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,1.00
58,A Bronx Tale,1994-05-20,78.00,"Drama,Crime",English,10000000.00,17287898.00,7287898.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,1.00,0.00
61,The Forbidden Legend: Sex & Chopsticks 2,2009-03-04,60.00,"Comedy,Drama,Romance",Cantonese,89600000.00,318375960.20,228775960.20,0.00,0.00,...,0.00,0.00,0.00,1.00,0.00,1.00,0.00,1.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9824,The Seven Deadly Sins: Prisoners of the Sky,2018-08-18,76.00,"Action,Adventure,Fantasy,Animation",Japanese,62600000.00,706002070.80,643402070.80,1.00,0.00,...,0.00,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
9826,20th Century Women,2016-12-28,73.00,Drama,English,7000000.00,9353729.00,2353729.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00
9827,Delta Force 2: The Colombian Connection,1990-08-24,54.00,Action,English,9145817.80,6698361.00,-2447456.80,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
9828,The Russia House,1990-12-21,61.00,"Drama,Thriller,Romance",English,21800000.00,22997992.00,1197992.00,0.00,0.00,...,0.00,0.00,0.00,1.00,0.00,0.00,0.00,1.00,0.00,1.00


In [14]:
#Find how many different genres each movie has so we can disperse the dollar amounts evenly and add those values to column
date_range_df['Genre Sum'] = date_range_df[unique_genres].sum(axis=1)

date_range_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  date_range_df['Genre Sum'] = date_range_df[unique_genres].sum(axis=1)


Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit,Animation,Family,...,Fantasy,History,Romance,Documentary,Comedy,War,Drama,Crime,Thriller,Genre Sum
22,The Passion of the Christ,2004-02-25,74.00,Drama,English,25000000.00,622313635.00,597313635.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1.00
35,John Wick: Chapter 2,2017-05-18,73.00,"Action,Thriller,Crime",English,40000000.00,171539887.00,131539887.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,1.00,3.00
39,John Wick: Chapter 3 - Parabellum,2019-05-16,74.00,"Action,Thriller,Crime",English,55000000.00,326709727.00,271709727.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,1.00,3.00
58,A Bronx Tale,1994-05-20,78.00,"Drama,Crime",English,10000000.00,17287898.00,7287898.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,1.00,1.00,0.00,2.00
61,The Forbidden Legend: Sex & Chopsticks 2,2009-03-04,60.00,"Comedy,Drama,Romance",Cantonese,89600000.00,318375960.20,228775960.20,0.00,0.00,...,0.00,0.00,1.00,0.00,1.00,0.00,1.00,0.00,0.00,3.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9824,The Seven Deadly Sins: Prisoners of the Sky,2018-08-18,76.00,"Action,Adventure,Fantasy,Animation",Japanese,62600000.00,706002070.80,643402070.80,1.00,0.00,...,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,4.00
9826,20th Century Women,2016-12-28,73.00,Drama,English,7000000.00,9353729.00,2353729.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1.00
9827,Delta Force 2: The Colombian Connection,1990-08-24,54.00,Action,English,9145817.80,6698361.00,-2447456.80,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00
9828,The Russia House,1990-12-21,61.00,"Drama,Thriller,Romance",English,21800000.00,22997992.00,1197992.00,0.00,0.00,...,0.00,0.00,1.00,0.00,0.00,0.00,1.00,0.00,1.00,3.00


In [15]:
#Remove blank space in around the words in the Language column
date_range_df["Language"] = date_range_df["Language"].str.lstrip().str.rstrip()

#Create a data frame with only English Movies
English_Movies_df = date_range_df.loc[date_range_df["Language"] == "English"]

English_Movies_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  date_range_df["Language"] = date_range_df["Language"].str.lstrip().str.rstrip()


Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit,Animation,Family,...,Fantasy,History,Romance,Documentary,Comedy,War,Drama,Crime,Thriller,Genre Sum
22,The Passion of the Christ,2004-02-25,74.00,Drama,English,25000000.00,622313635.00,597313635.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1.00
35,John Wick: Chapter 2,2017-05-18,73.00,"Action,Thriller,Crime",English,40000000.00,171539887.00,131539887.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,1.00,3.00
39,John Wick: Chapter 3 - Parabellum,2019-05-16,74.00,"Action,Thriller,Crime",English,55000000.00,326709727.00,271709727.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,1.00,3.00
58,A Bronx Tale,1994-05-20,78.00,"Drama,Crime",English,10000000.00,17287898.00,7287898.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,1.00,1.00,0.00,2.00
68,Avatar,2009-12-17,76.00,"Action,Adventure,Fantasy,Science Fiction",English,237000000.00,2923706026.00,2686706026.00,0.00,0.00,...,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,4.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9823,The Love Guru,2008-07-10,42.00,"Comedy,Romance",English,62000000.00,40159017.00,-21840983.00,0.00,0.00,...,0.00,0.00,1.00,0.00,1.00,0.00,0.00,0.00,0.00,2.00
9826,20th Century Women,2016-12-28,73.00,Drama,English,7000000.00,9353729.00,2353729.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1.00
9827,Delta Force 2: The Colombian Connection,1990-08-24,54.00,Action,English,9145817.80,6698361.00,-2447456.80,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00
9828,The Russia House,1990-12-21,61.00,"Drama,Thriller,Romance",English,21800000.00,22997992.00,1197992.00,0.00,0.00,...,0.00,0.00,1.00,0.00,0.00,0.00,1.00,0.00,1.00,3.00


In [16]:
#Create a data frame with only Non-English Movies
Non_English_Movies_df = date_range_df.loc[date_range_df["Language"] != "English"]

Non_English_Movies_df

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit,Animation,Family,...,Fantasy,History,Romance,Documentary,Comedy,War,Drama,Crime,Thriller,Genre Sum
61,The Forbidden Legend: Sex & Chopsticks 2,2009-03-04,60.00,"Comedy,Drama,Romance",Cantonese,89600000.00,318375960.20,228775960.20,0.00,0.00,...,0.00,0.00,1.00,0.00,1.00,0.00,1.00,0.00,0.00,3.00
107,Demon Slayer: Kimetsu no Yaiba Sibling's Bond,2019-03-29,80.00,"Action,Animation,Fantasy",Japanese,129000000.00,645532173.40,516532173.40,1.00,0.00,...,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,3.00
204,The Ten Commandments: The Movie,2016-01-28,72.00,Drama,Portuguese,39307171.00,37070177.00,-2236994.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1.00
273,3-D Sex and Zen: Extreme Ecstasy,2011-04-14,48.00,"Drama,Adventure,Romance,Action,History",Cantonese,3500000.00,10308238.00,6808238.00,0.00,0.00,...,0.00,1.00,1.00,0.00,0.00,0.00,1.00,0.00,0.00,5.00
283,The Last: Naruto the Movie,2014-12-06,78.00,"Action,Romance,Animation",Japanese,903852.00,19840000.00,18936148.00,1.00,0.00,...,0.00,0.00,1.00,0.00,0.00,0.00,0.00,0.00,0.00,3.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9803,My Student's Mom 2,2017-11-09,10.00,Romance,Korean,201940000.00,38157314.00,-163782686.00,0.00,0.00,...,0.00,0.00,1.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00
9811,The Master,1992-05-28,64.00,Action,Cantonese,37500000.00,29044338.00,-8455662.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00
9813,Miss Violence,2013-11-07,71.00,Drama,Greek,100000000.00,369861963.80,269861963.80,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1.00
9819,On Your Wedding Day,2018-08-22,75.00,Romance,Korean,7397000.00,20895905.00,13498905.00,0.00,0.00,...,0.00,0.00,1.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00


In [17]:
English_Movies_df.reset_index(drop=True, inplace=True)

English_Movies_df

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit,Animation,Family,...,Fantasy,History,Romance,Documentary,Comedy,War,Drama,Crime,Thriller,Genre Sum
0,The Passion of the Christ,2004-02-25,74.00,Drama,English,25000000.00,622313635.00,597313635.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1.00
1,John Wick: Chapter 2,2017-05-18,73.00,"Action,Thriller,Crime",English,40000000.00,171539887.00,131539887.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,1.00,3.00
2,John Wick: Chapter 3 - Parabellum,2019-05-16,74.00,"Action,Thriller,Crime",English,55000000.00,326709727.00,271709727.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00,1.00,3.00
3,A Bronx Tale,1994-05-20,78.00,"Drama,Crime",English,10000000.00,17287898.00,7287898.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,1.00,1.00,0.00,2.00
4,Avatar,2009-12-17,76.00,"Action,Adventure,Fantasy,Science Fiction",English,237000000.00,2923706026.00,2686706026.00,0.00,0.00,...,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,4.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4770,The Love Guru,2008-07-10,42.00,"Comedy,Romance",English,62000000.00,40159017.00,-21840983.00,0.00,0.00,...,0.00,0.00,1.00,0.00,1.00,0.00,0.00,0.00,0.00,2.00
4771,20th Century Women,2016-12-28,73.00,Drama,English,7000000.00,9353729.00,2353729.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1.00
4772,Delta Force 2: The Colombian Connection,1990-08-24,54.00,Action,English,9145817.80,6698361.00,-2447456.80,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00
4773,The Russia House,1990-12-21,61.00,"Drama,Thriller,Romance",English,21800000.00,22997992.00,1197992.00,0.00,0.00,...,0.00,0.00,1.00,0.00,0.00,0.00,1.00,0.00,1.00,3.00


In [18]:
Non_English_Movies_df.reset_index(drop=True, inplace=True)

Non_English_Movies_df

Unnamed: 0,Name,Date,Score,Genre,Language,Budget,Revenue,Profit,Animation,Family,...,Fantasy,History,Romance,Documentary,Comedy,War,Drama,Crime,Thriller,Genre Sum
0,The Forbidden Legend: Sex & Chopsticks 2,2009-03-04,60.00,"Comedy,Drama,Romance",Cantonese,89600000.00,318375960.20,228775960.20,0.00,0.00,...,0.00,0.00,1.00,0.00,1.00,0.00,1.00,0.00,0.00,3.00
1,Demon Slayer: Kimetsu no Yaiba Sibling's Bond,2019-03-29,80.00,"Action,Animation,Fantasy",Japanese,129000000.00,645532173.40,516532173.40,1.00,0.00,...,1.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,3.00
2,The Ten Commandments: The Movie,2016-01-28,72.00,Drama,Portuguese,39307171.00,37070177.00,-2236994.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1.00
3,3-D Sex and Zen: Extreme Ecstasy,2011-04-14,48.00,"Drama,Adventure,Romance,Action,History",Cantonese,3500000.00,10308238.00,6808238.00,0.00,0.00,...,0.00,1.00,1.00,0.00,0.00,0.00,1.00,0.00,0.00,5.00
4,The Last: Naruto the Movie,2014-12-06,78.00,"Action,Romance,Animation",Japanese,903852.00,19840000.00,18936148.00,1.00,0.00,...,0.00,0.00,1.00,0.00,0.00,0.00,0.00,0.00,0.00,3.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1472,My Student's Mom 2,2017-11-09,10.00,Romance,Korean,201940000.00,38157314.00,-163782686.00,0.00,0.00,...,0.00,0.00,1.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00
1473,The Master,1992-05-28,64.00,Action,Cantonese,37500000.00,29044338.00,-8455662.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00
1474,Miss Violence,2013-11-07,71.00,Drama,Greek,100000000.00,369861963.80,269861963.80,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1.00
1475,On Your Wedding Day,2018-08-22,75.00,Romance,Korean,7397000.00,20895905.00,13498905.00,0.00,0.00,...,0.00,0.00,1.00,0.00,0.00,0.00,0.00,0.00,0.00,1.00


In [19]:
# Export the final data frames, English and Non English into csv files
English_Movies_Data = English_Movies_df.to_csv("English_Movies_Data1.csv", index=False)

Non_English_Movies_Data = Non_English_Movies_df.to_csv("Non_English_Movies_Data1.csv", index=False)