In [2]:
import pandas as pd 
import numpy as np 
import re 
import matplotlib.pyplot as plt 
from bs4 import BeautifulSoup 
from datetime import datetime
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import plotly.express as px

In [3]:
df = pd.read_csv("data/TMDB_movie_dataset_v11.csv", encoding='utf-8')

In [4]:
df.columns

Index(['id', 'title', 'vote_average', 'vote_count', 'status', 'release_date',
       'revenue', 'runtime', 'adult', 'backdrop_path', 'budget', 'homepage',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'tagline', 'genres',
       'production_companies', 'production_countries', 'spoken_languages',
       'keywords'],
      dtype='object')

In [5]:
df_relevant = df[['title', 'release_date', 'vote_average','revenue','overview']]

In [6]:
df_relevant.head(5)
df_relevant['release_date'] = pd.to_datetime(df_relevant['release_date'], format='%Y-%m-%d')

# Add a new column 'year' with the year extracted from the date
df_relevant['year'] = df_relevant['release_date'].dt.year

df_relevant = df_relevant.drop(columns = 'release_date')

df_relevant['revenue'] = df_relevant['revenue'].replace(0, np.nan)

df_relevant['vote_average'] = df_relevant['vote_average'].replace(0, np.nan)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_relevant['release_date'] = pd.to_datetime(df_relevant['release_date'], format='%Y-%m-%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_relevant['year'] = df_relevant['release_date'].dt.year


In [7]:
df_relevant.head(5)


Unnamed: 0,title,vote_average,revenue,year
0,Inception,8.364,825532800.0,2010.0
1,Interstellar,8.417,701729200.0,2014.0
2,The Dark Knight,8.512,1004558000.0,2008.0
3,Avatar,7.573,2923706000.0,2009.0
4,The Avengers,7.71,1518816000.0,2012.0


In [8]:
df_movies=pd.read_csv("generated/" + "cleaned_data.csv",index_col = 'Wikipedia movie ID')

In [9]:
df_test = df_movies.head(10)

In [10]:
df_merged = pd.merge(
    df_relevant,
    df_test,
    left_on=['year', 'title'],
    right_on=['Movie release year', 'Movie name'],
    how='right'
)
df_merged.index = df_test.index

df_merged.drop(columns = ['title','year'])
df_merged.head(10)

Unnamed: 0_level_0,title,vote_average,revenue,year,Movie name,Movie release month,Movie release year,Movie release date,Movie box office revenue,Movie runtime,Movie languages,Movie countries,Movie genres,Summary,Sentiment,Compound Score
Wikipedia movie ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
975900,Ghosts of Mars,5.127,14010832.0,2001.0,Ghosts of Mars,8.0,2001.0,2001-08-24,24771180.0,98.0,['English'],['United States of America'],"['Thriller', 'Science Fiction', 'Horror', 'Adv...","Set in the second half of the 22nd century, th...",negative,-0.9913
3196793,Getting Away with Murder: The JonBenét Ramsey ...,,,2000.0,Getting Away with Murder: The JonBenét Ramsey ...,2.0,2000.0,2000-02-16,,95.0,['English'],['United States of America'],"['Mystery', 'Biographical film', 'Drama', 'Cri...",,,
28463795,,,,,Brun bitter,,1988.0,,,83.0,['Norwegian'],['Norway'],"['Crime Fiction', 'Drama']",,,
9363483,,,,,White Of The Eye,,1987.0,,,110.0,['English'],['United Kingdom'],"['Thriller', 'Erotic thriller', 'Psychological...",A series of murders of rich young women throug...,negative,-0.9983
261236,A Woman in Flames,5.3,,1983.0,A Woman in Flames,,1983.0,,,106.0,['German'],['Germany'],['Drama'],"Eva, an upper class housewife, becomes frustra...",positive,0.9604
13696889,The Gangsters,6.0,,1913.0,The Gangsters,5.0,1913.0,1913-05-29,,35.0,"['Silent film', 'English']",['United States of America'],"['Short Film', 'Silent film', 'Indie', 'Black-...",,,
18998739,The Sorcerer's Apprentice,4.6,,2002.0,The Sorcerer's Apprentice,,2002.0,,,86.0,['English'],['South Africa'],"['Family Film', 'Fantasy', 'Adventure', 'World...","Every hundred years, the evil Morgana returns...",negative,-0.8885
10408933,Alexander's Ragtime Band,6.6,4000000.0,1938.0,Alexander's Ragtime Band,8.0,1938.0,1938-08-16,76195730.0,106.0,['English'],['United States of America'],"['Musical', 'Comedy', 'Black-and-white']",,,
9997961,Contigo y aquí,,,1974.0,Contigo y aquí,,1974.0,,,,['Spanish'],['Argentina'],"['Musical', 'Drama', 'Comedy']",,,
2345652,,,,,City of the Dead,,1960.0,,,76.0,['English'],['United Kingdom'],"['Horror', 'Supernatural']",,,


In [11]:
inflation_data = {
    1914: 1.3, 1915: 0.9, 1916: 7.7, 1917: 17.8, 1918: 17.3, 
    1919: 15.2, 1920: 15.6, 1921: -10.9, 1922: -6.2, 1923: 1.8, 
    1924: 0.4, 1925: 2.4, 1926: 0.9, 1927: -1.9, 1928: -1.2, 
    1929: 0.0, 1930: -2.7, 1931: -8.9, 1932: -10.3, 1933: -5.2, 
    1934: 3.5, 1935: 2.6, 1936: 1.0, 1937: 3.7, 1938: -2.0, 
    1939: -1.3, 1940: 0.7, 1941: 5.1, 1942: 10.9, 1943: 6.0, 
    1944: 1.6, 1945: 2.3, 1946: 8.5, 1947: 14.4, 1948: 7.7, 
    1949: -1.0, 1950: 1.1, 1951: 7.9, 1952: 2.3, 1953: 0.8, 
    1954: 0.3, 1955: -0.3, 1956: 1.5, 1957: 3.3, 1958: 2.7, 
    1959: 1.08, 1960: 1.5, 1961: 1.1, 1962: 1.2, 1963: 1.2, 
    1964: 1.3, 1965: 1.6, 1966: 3.0, 1967: 2.8, 1968: 4.3, 
    1969: 5.5, 1970: 5.8, 1971: 4.3, 1972: 3.3, 1973: 6.2, 
    1974: 11.1, 1975: 9.1, 1976: 5.7, 1977: 6.5, 1978: 7.6, 
    1979: 11.3, 1980: 13.5, 1981: 10.3, 1982: 6.1, 1983: 3.2, 
    1984: 4.3, 1985: 3.5, 1986: 1.9, 1987: 3.7, 1988: 4.1, 
    1989: 4.8, 1990: 5.4, 1991: 4.2, 1992: 3.0, 1993: 3.0, 
    1994: 2.6, 1995: 2.8, 1996: 2.9, 1997: 2.3, 1998: 1.6, 
    1999: 2.2, 2000: 3.4, 2001: 2.8, 2002: 1.6, 2003: 2.3, 
    2004: 2.7, 2005: 3.4, 2006: 3.2, 2007: 2.9, 2008: 3.8, 
    2009: -0.4, 2010: 1.6, 2011: 3.2, 2012: 2.1, 2013: 1.5, 
    2014: 1.6, 2015: 0.1, 2016: 1.3, 2017: 2.1, 2018: 2.4, 
    2019: 1.8, 2020: 1.2, 2021: 4.7, 2022: 8.0, 2023: 4.1, 2024: 3.2
}

#source : minesota website

def adjust_for_inflation(year, amount, inflation_data):
    """
    Adjusts the given amount from the provided year to 2024 based on annual inflation rates.

    Parameters:
    year (int): The starting year.
    amount (float): The amount to be adjusted.
    inflation_data (dict): A dictionary with years as keys and inflation rates as values.

    Returns:
    float: The inflation-adjusted amount for 2024.
    """
    adjusted_value = amount
    
    for y in range(year.astype(int), 2024):
        if y in inflation_data:
            inflation_rate = inflation_data[y]
            # Adjust for inflation for each year
            adjusted_value *= (1 + inflation_rate / 100)
    return adjusted_value


for i in range(len(df_merged)):
    if pd.notna(df_merged.iloc[i]['Movie release year']) and pd.notna(df_merged.iloc[i]['revenue']):
        release_year = df_merged['Movie release year'].iloc[i]
        
        adjusted_revenue = adjust_for_inflation(release_year, df_merged.iloc[i]['revenue'], inflation_data)
        
        df_merged.iloc[i, df_merged.columns.get_loc('revenue')] = adjusted_revenue
 


Unnamed: 0_level_0,title,vote_average,revenue,year,Movie name,Movie release month,Movie release year,Movie release date,Movie box office revenue,Movie runtime,Movie languages,Movie countries,Movie genres,Summary,Sentiment,Compound Score
Wikipedia movie ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
975900,Ghosts of Mars,5.127,24771180.0,2001.0,Ghosts of Mars,8.0,2001.0,2001-08-24,24771180.0,98.0,['English'],['United States of America'],"['Thriller', 'Science Fiction', 'Horror', 'Adv...","Set in the second half of the 22nd century, th...",negative,-0.9913
3196793,Getting Away with Murder: The JonBenét Ramsey ...,,,2000.0,Getting Away with Murder: The JonBenét Ramsey ...,2.0,2000.0,2000-02-16,,95.0,['English'],['United States of America'],"['Mystery', 'Biographical film', 'Drama', 'Cri...",,,
28463795,,,,,Brun bitter,,1988.0,,,83.0,['Norwegian'],['Norway'],"['Crime Fiction', 'Drama']",,,
9363483,,,,,White Of The Eye,,1987.0,,,110.0,['English'],['United Kingdom'],"['Thriller', 'Erotic thriller', 'Psychological...",A series of murders of rich young women throug...,negative,-0.9983
261236,A Woman in Flames,5.3,,1983.0,A Woman in Flames,,1983.0,,,106.0,['German'],['Germany'],['Drama'],"Eva, an upper class housewife, becomes frustra...",positive,0.9604


In [14]:
df_merged.head(10)

Unnamed: 0_level_0,title,vote_average,revenue,year,Movie name,Movie release month,Movie release year,Movie release date,Movie box office revenue,Movie runtime,Movie languages,Movie countries,Movie genres,Summary,Sentiment,Compound Score
Wikipedia movie ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
975900,Ghosts of Mars,5.127,24771180.0,2001.0,Ghosts of Mars,8.0,2001.0,2001-08-24,24771180.0,98.0,['English'],['United States of America'],"['Thriller', 'Science Fiction', 'Horror', 'Adv...","Set in the second half of the 22nd century, th...",negative,-0.9913
3196793,Getting Away with Murder: The JonBenét Ramsey ...,,,2000.0,Getting Away with Murder: The JonBenét Ramsey ...,2.0,2000.0,2000-02-16,,95.0,['English'],['United States of America'],"['Mystery', 'Biographical film', 'Drama', 'Cri...",,,
28463795,,,,,Brun bitter,,1988.0,,,83.0,['Norwegian'],['Norway'],"['Crime Fiction', 'Drama']",,,
9363483,,,,,White Of The Eye,,1987.0,,,110.0,['English'],['United Kingdom'],"['Thriller', 'Erotic thriller', 'Psychological...",A series of murders of rich young women throug...,negative,-0.9983
261236,A Woman in Flames,5.3,,1983.0,A Woman in Flames,,1983.0,,,106.0,['German'],['Germany'],['Drama'],"Eva, an upper class housewife, becomes frustra...",positive,0.9604
13696889,The Gangsters,6.0,,1913.0,The Gangsters,5.0,1913.0,1913-05-29,,35.0,"['Silent film', 'English']",['United States of America'],"['Short Film', 'Silent film', 'Indie', 'Black-...",,,
18998739,The Sorcerer's Apprentice,4.6,,2002.0,The Sorcerer's Apprentice,,2002.0,,,86.0,['English'],['South Africa'],"['Family Film', 'Fantasy', 'Adventure', 'World...","Every hundred years, the evil Morgana returns...",negative,-0.8885
10408933,Alexander's Ragtime Band,6.6,84661920.0,1938.0,Alexander's Ragtime Band,8.0,1938.0,1938-08-16,76195730.0,106.0,['English'],['United States of America'],"['Musical', 'Comedy', 'Black-and-white']",,,
9997961,Contigo y aquí,,,1974.0,Contigo y aquí,,1974.0,,,,['Spanish'],['Argentina'],"['Musical', 'Drama', 'Comedy']",,,
2345652,,,,,City of the Dead,,1960.0,,,76.0,['English'],['United Kingdom'],"['Horror', 'Supernatural']",,,
