In [1]:
import pandas as pd
from functools import reduce
from sqlalchemy import create_engine
import psycopg2 

In [2]:
# import and transform economic indicators

sainc_raw = pd.read_csv(r'SAINC_2000_2020.csv')
economics = pd.melt(sainc_raw, id_vars=['GeoFIPS','GeoName','Region','LineCode','Description','Unit'], var_name='Year', value_name='Metric', ignore_index=True)
economics.head()


Unnamed: 0,GeoFIPS,GeoName,Region,LineCode,Description,Unit,Year,Metric
0,"""00000""",United States,,10,Personal income (millions of dollars),Millions of dollars,2000,8654561.0
1,"""00000""",United States,,11,Nonfarm personal income 1/,Millions of dollars,2000,8603301.0
2,"""00000""",United States,,12,Farm income 2/,Millions of dollars,2000,51260.0
3,"""00000""",United States,,20,Population (persons) 3/,Number of persons,2000,282162411.0
4,"""00000""",United States,,30,Per capita personal income (dollars) 4/,Dollars,2000,30672.0


In [3]:
# import and transform yearly inflation factors

inflation = pd.read_csv(r'InflationTable.csv')
inflation = inflation.drop('Month', axis=1)
inflation = inflation.groupby(['Year']).mean()
inflation


Unnamed: 0_level_0,Inflation_Factor
Year,Unnamed: 1_level_1
2000,0.642856
2001,0.66102
2002,0.671513
2003,0.686767
2004,0.705133
2005,0.729019
2006,0.752543
2007,0.774049
2008,0.803586
2009,0.800904


In [4]:
# import movie data

movies = pd.read_csv(r'tmdb_api_export_1996_2021.csv')
movies.head()

Unnamed: 0.1,Unnamed: 0,Film,Revenue,Year
0,0,Independence Day,817400891,1996
1,1,Twister,494471524,1996
2,2,Mission: Impossible,457731198,1996
3,3,The Rock,335062621,1996
4,4,The Hunchback of Notre Dame,325338851,1996


In [5]:
# join movies and inflation factor dataframes

dfs  = [inflation, movies]
df = reduce(lambda left,right: pd.merge(left,right,on='Year'), dfs)

In [6]:
# adjust revenue for inflation

df['Adjusted_Revenue'] = df['Inflation_Factor'] * df['Revenue']
df

Unnamed: 0.1,Year,Inflation_Factor,Unnamed: 0,Film,Revenue,Adjusted_Revenue
0,2000,0.642856,400,Mission: Impossible II,546388105,3.512487e+08
1,2000,0.642856,401,Gladiator,465361176,2.991600e+08
2,2000,0.642856,402,Cast Away,429632142,2.761914e+08
3,2000,0.642856,403,What Women Want,374111707,2.404998e+08
4,2000,0.642856,404,Dinosaur,354248063,2.277304e+08
...,...,...,...,...,...,...
2195,2021,1.000000,2595,Chernobyl: Abyss,5370393,5.370393e+06
2196,2021,1.000000,2596,Titane,5115725,5.115725e+06
2197,2021,1.000000,2597,Family Swap,4849622,4.849622e+06
2198,2021,1.000000,2598,Qismat 2,4700000,4.700000e+06


In [7]:
# combine all economic indicator data with movies data

economics['Year'] = economics['Year'].astype(int)
merged  = [df, economics]
final_table = reduce(lambda left,right: pd.merge(left,right,on='Year'), merged)

In [8]:
# adjust economic indicators for inflation

final_table['Adjusted_Metric'] = final_table['Metric'] * final_table['Inflation_Factor']
final_table.head()

Unnamed: 0.1,Year,Inflation_Factor,Unnamed: 0,Film,Revenue,Adjusted_Revenue,GeoFIPS,GeoName,Region,LineCode,Description,Unit,Metric,Adjusted_Metric
0,2000,0.642856,400,Mission: Impossible II,546388105,351248700.0,"""00000""",United States,,10,Personal income (millions of dollars),Millions of dollars,8654561.0,5563633.0
1,2000,0.642856,400,Mission: Impossible II,546388105,351248700.0,"""00000""",United States,,11,Nonfarm personal income 1/,Millions of dollars,8603301.0,5530680.0
2,2000,0.642856,400,Mission: Impossible II,546388105,351248700.0,"""00000""",United States,,12,Farm income 2/,Millions of dollars,51260.0,32952.78
3,2000,0.642856,400,Mission: Impossible II,546388105,351248700.0,"""00000""",United States,,20,Population (persons) 3/,Number of persons,282162411.0,181389700.0
4,2000,0.642856,400,Mission: Impossible II,546388105,351248700.0,"""00000""",United States,,30,Per capita personal income (dollars) 4/,Dollars,30672.0,19717.67


In [9]:
# connect to postgres

print('Connecting to the PostgreSQL database...')
conn = psycopg2.connect(
    host="localhost",
    port='5432',
    database="postgres",
    user="postgres",
    password="123")

Connecting to the PostgreSQL database...


In [13]:
#create postgres table from pandas dataframe

engine = create_engine('postgresql://postgres:123@localhost:5432/postgres')
final_table.to_sql('finaltable', engine)

In [14]:
# query table

cur = conn.cursor()

query1 = 'SELECT * from finaltable LIMIT 10'
cur.execute(query1)
print("The number of rows: ", cur.rowcount)

row = cur.fetchone()
while row is not None:
    print("Year:", row[0], ", Film:", row[1], ", Adjusted_Revenue:", row[2], ", Description:", row[3], ", Adjusted_Metric:", row[4])
    row = cur.fetchone()   

The number of rows:  10
Year: 0 , Film: 2000 , Adjusted_Revenue: 0.6428556214166666 , Description: 400 , Adjusted_Metric: Mission: Impossible II
Year: 1 , Film: 2000 , Adjusted_Revenue: 0.6428556214166666 , Description: 400 , Adjusted_Metric: Mission: Impossible II
Year: 2 , Film: 2000 , Adjusted_Revenue: 0.6428556214166666 , Description: 400 , Adjusted_Metric: Mission: Impossible II
Year: 3 , Film: 2000 , Adjusted_Revenue: 0.6428556214166666 , Description: 400 , Adjusted_Metric: Mission: Impossible II
Year: 4 , Film: 2000 , Adjusted_Revenue: 0.6428556214166666 , Description: 400 , Adjusted_Metric: Mission: Impossible II
Year: 5 , Film: 2000 , Adjusted_Revenue: 0.6428556214166666 , Description: 400 , Adjusted_Metric: Mission: Impossible II
Year: 6 , Film: 2000 , Adjusted_Revenue: 0.6428556214166666 , Description: 400 , Adjusted_Metric: Mission: Impossible II
Year: 7 , Film: 2000 , Adjusted_Revenue: 0.6428556214166666 , Description: 400 , Adjusted_Metric: Mission: Impossible II
Year: 8 