# Budget Optimization: Tests 1-10


## Loading Data and Converting Data Types

In [1]:
import pandas as pd

In [2]:
budgetsdf = pd.read_csv('Data/tn.movie_budgets.csv')

In [3]:
df = budgetsdf.copy()

In [4]:
df['worldwide_gross'] = df['worldwide_gross'].apply(lambda x: x.replace('$', '').replace(',', ''))
df['domestic_gross'] = df['domestic_gross'].apply(lambda x: x.replace('$', '').replace(',', ''))
df['production_budget'] = df['production_budget'].apply(lambda x: x.replace('$', '').replace(',', ''))

In [5]:
df['worldwide_gross'] = df['worldwide_gross'].astype(float)
df['domestic_gross'] = df['domestic_gross'].astype(float)
df['production_budget'] = df['production_budget'].astype(float)

## Calculating Gross to Budget Ratio and Creating Budget Categories

In [6]:
df['gtbratio'] = df['worldwide_gross']/df['production_budget']

In [7]:
# Source = https://www.studiobinder.com/blog/production-budget/#:~:text=Film%20Budget%20Examples,-Low%2Dend%20movie&text=There's%20some%20conjecture%20as%20to,(2007)%20%E2%80%93%20Budget%3A%20%247%2C000

df['budgetcat'] = ['low' if x < 5000000 else 'mid' if x < 50000000 else 'high' for x in df['production_budget']]

In [8]:
lowdf = df.loc[df['budgetcat'] == 'low']
middf = df.loc[df['budgetcat'] == 'mid']
highdf = df.loc[df['budgetcat'] == 'high']

## Records of tests 1-10

Test 1

In [50]:
highdf.sample(n=1)

Unnamed: 0,id,release_date,movie,production_budget,domestic_gross,worldwide_gross,gtbratio,net,ntbratio,budgetcat,profitability,success
662,63,11-Mar-05,Hostage,75000000.0,34636443.0,77636443.0,1.035153,2636443.0,0.035153,high,-72363557.0,no


In [9]:
midtest1 = middf.sample(n=4)
print("budget: ", midtest1['production_budget'].sum())
print("gross: ", midtest1['worldwide_gross'].sum())

budget:  80000000.0
gross:  553227808.0


In [58]:
lowtest1 = lowdf.sample(n=45)
print("budget: ", lowtest1['production_budget'].sum())
print("gross: ", lowtest1['worldwide_gross'].sum())

budget:  74285848.0
gross:  229992510.0


Test 2

In [59]:
highdf.sample(n=1)

Unnamed: 0,id,release_date,movie,production_budget,domestic_gross,worldwide_gross,gtbratio,net,ntbratio,budgetcat,profitability,success
1170,71,9-Aug-02,Blood Work,50000000.0,26199517.0,26199517.0,0.52399,-23800483.0,-0.47601,high,-73800483.0,no


In [60]:
midtest1 = middf.sample(n=3)
print("budget: ", midtest1['production_budget'].sum())
print("gross: ", midtest1['worldwide_gross'].sum())

budget:  59000000.0
gross:  33087228.0


In [65]:
lowtest1 = lowdf.sample(n=30)
print("budget: ", lowtest1['production_budget'].sum())
print("gross: ", lowtest1['worldwide_gross'].sum())

budget:  53321173.0
gross:  223802611.0


Test 3

In [66]:
highdf.sample(n=1)

Unnamed: 0,id,release_date,movie,production_budget,domestic_gross,worldwide_gross,gtbratio,net,ntbratio,budgetcat,profitability,success
703,4,8-Aug-03,S.W.A.T.,70000000.0,116877597.0,207154748.0,2.959354,137154748.0,1.959354,high,67154748.0,yes


In [67]:
midtest1 = middf.sample(n=4)
print("budget: ", midtest1['production_budget'].sum())
print("gross: ", midtest1['worldwide_gross'].sum())

budget:  68000000.0
gross:  128307395.0


In [68]:
lowtest1 = lowdf.sample(n=45)
print("budget: ", lowtest1['production_budget'].sum())
print("gross: ", lowtest1['worldwide_gross'].sum())

budget:  72978000.0
gross:  426209825.0


Test 4

In [69]:
highdf.sample(n=1)

Unnamed: 0,id,release_date,movie,production_budget,domestic_gross,worldwide_gross,gtbratio,net,ntbratio,budgetcat,profitability,success
12,13,2-Jul-13,The Lone Ranger,275000000.0,89302115.0,260002115.0,0.945462,-14997885.0,-0.054538,high,-289997885.0,no


In [78]:
midtest1 = middf.sample(n=14)
print("budget: ", midtest1['production_budget'].sum())
print("gross: ", midtest1['worldwide_gross'].sum())

budget:  274000000.0
gross:  1237681581.0


In [83]:
lowtest1 = lowdf.sample(n=150)
print("budget: ", lowtest1['production_budget'].sum())
print("gross: ", lowtest1['worldwide_gross'].sum())

budget:  271427000.0
gross:  1224494174.0


Test 5

In [84]:
highdf.sample(n=1)

Unnamed: 0,id,release_date,movie,production_budget,domestic_gross,worldwide_gross,gtbratio,net,ntbratio,budgetcat,profitability,success
807,8,11-Oct-96,The Long Kiss Goodnight,65000000.0,33447612.0,33447612.0,0.514579,-31552388.0,-0.485421,high,-96552388.0,no


In [87]:
midtest1 = middf.sample(n=4)
print("budget: ", midtest1['production_budget'].sum())
print("gross: ", midtest1['worldwide_gross'].sum())

budget:  71500000.0
gross:  208102990.0


In [89]:
lowtest1 = lowdf.sample(n=40)
print("budget: ", lowtest1['production_budget'].sum())
print("gross: ", lowtest1['worldwide_gross'].sum())

budget:  71083000.0
gross:  503732849.0


Test 6

In [90]:
highdf.sample(n=1)

Unnamed: 0,id,release_date,movie,production_budget,domestic_gross,worldwide_gross,gtbratio,net,ntbratio,budgetcat,profitability,success
762,63,2-Mar-12,Doctor Seuss' The Lorax,67500000.0,214030500.0,350976753.0,5.199656,283476753.0,4.199656,high,215976753.0,yes


In [96]:
midtest1 = middf.sample(n=4)
print("budget: ", midtest1['production_budget'].sum())
print("gross: ", midtest1['worldwide_gross'].sum())

budget:  61000000.0
gross:  322044719.0


In [99]:
lowtest1 = lowdf.sample(n=35)
print("budget: ", lowtest1['production_budget'].sum())
print("gross: ", lowtest1['worldwide_gross'].sum())

budget:  61574000.0
gross:  524042835.0


Test 7

In [100]:
highdf.sample(n=1)

Unnamed: 0,id,release_date,movie,production_budget,domestic_gross,worldwide_gross,gtbratio,net,ntbratio,budgetcat,profitability,success
799,100,8-Dec-04,Blade: Trinity,65000000.0,52397389.0,131353165.0,2.020818,66353165.0,1.020818,high,1353165.0,yes


In [103]:
midtest1 = middf.sample(n=3)
print("budget: ", midtest1['production_budget'].sum())
print("gross: ", midtest1['worldwide_gross'].sum())

budget:  70000000.0
gross:  360917807.0


In [105]:
lowtest1 = lowdf.sample(n=35)
print("budget: ", lowtest1['production_budget'].sum())
print("gross: ", lowtest1['worldwide_gross'].sum())

budget:  62125000.0
gross:  145859718.0


Test 8

In [106]:
highdf.sample(n=1)

Unnamed: 0,id,release_date,movie,production_budget,domestic_gross,worldwide_gross,gtbratio,net,ntbratio,budgetcat,profitability,success
42,43,19-Dec-97,Titanic,200000000.0,659363944.0,2208208000.0,11.041042,2008208000.0,10.041042,high,1808208000.0,yes


In [114]:
midtest1 = middf.sample(n=10)
print("budget: ", midtest1['production_budget'].sum())
print("gross: ", midtest1['worldwide_gross'].sum())

budget:  206500000.0
gross:  785108469.0


In [119]:
lowtest1 = lowdf.sample(n=120)
print("budget: ", lowtest1['production_budget'].sum())
print("gross: ", lowtest1['worldwide_gross'].sum())

budget:  191355000.0
gross:  1183895637.0


Test 9

In [120]:
highdf.sample(n=1)

Unnamed: 0,id,release_date,movie,production_budget,domestic_gross,worldwide_gross,gtbratio,net,ntbratio,budgetcat,profitability,success
1132,33,23-Nov-05,Syriana,50000000.0,50824620.0,92690959.0,1.853819,42690959.0,0.853819,high,-7309041.0,no


In [123]:
midtest1 = middf.sample(n=3)
print("budget: ", midtest1['production_budget'].sum())
print("gross: ", midtest1['worldwide_gross'].sum())

budget:  48000000.0
gross:  90936479.0


In [125]:
lowtest1 = lowdf.sample(n=30)
print("budget: ", lowtest1['production_budget'].sum())
print("gross: ", lowtest1['worldwide_gross'].sum())

budget:  56522000.0
gross:  263949114.0


Test 10

In [126]:
highdf.sample(n=1)

Unnamed: 0,id,release_date,movie,production_budget,domestic_gross,worldwide_gross,gtbratio,net,ntbratio,budgetcat,profitability,success
937,38,25-Dec-08,The Spirit,60000000.0,19806188.0,39006188.0,0.650103,-20993812.0,-0.349897,high,-80993812.0,no


In [128]:
midtest1 = middf.sample(n=4)
print("budget: ", midtest1['production_budget'].sum())
print("gross: ", midtest1['worldwide_gross'].sum())

budget:  59600000.0
gross:  112461875.0


In [129]:
lowtest1 = lowdf.sample(n=35)
print("budget: ", lowtest1['production_budget'].sum())
print("gross: ", lowtest1['worldwide_gross'].sum())

budget:  63729000.0
gross:  278664500.0
