# Breaking Bad Notebook
***
 **General Analysis and Visualization Creation**

### Importation Statements

In [1]:
import pandas as pd
import altair as alt

### Data Aggregation and Cleaning

In [2]:
#Display all columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#Read CSV
#https://cosaari.github.io/breaking-bad.csv
df = pd.read_csv('breaking-bad.csv', error_bad_lines=False)
#Name index
df.index.name = 'index'
df['index'] = df.index.values
#Fill empty values
df.fillna(0,inplace=True)
#Print data
df

Unnamed: 0_level_0,season,episode,title,year,votes,rating,us-viewers,index
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,1,1,Pilot,2008,23970,9.0,1.41,0
1,1,2,Cat's in the Bag...,2008,17267,8.7,1.49,1
2,1,3,...And the Bag's in the River,2008,16719,8.8,1.08,2
3,1,4,Cancer Man,2008,16211,8.3,1.09,3
4,1,5,Gray Matter,2008,15896,8.4,0.97,4
5,1,6,Crazy Handful of Nothin',2008,18447,9.3,1.07,5
6,1,7,A No-Rough-Stuff-Type Deal,2008,16208,8.9,1.5,6
7,2,1,Seven Thirty-Seven,2009,14506,8.7,1.66,7
8,2,2,Grilled,2009,16918,9.3,1.6,8
9,2,3,Bit by a Dead Bee,2009,14062,8.4,1.13,9


## Visualizations

In [18]:
x = alt.Chart(df).mark_circle(color='red').encode(
    x='index',
    y='us-viewers',
    color= alt.Color('season', scale=alt.Scale(scheme='rainbow')),
    tooltip = ["season","episode","year"]
).properties(
     title="Number of Viewers per Episode",
     width=350,
     height=350
)

x.encoding.y.title = 'US Viewers (Millions)'
x.encoding.x.title = 'Episodes'
x.save('bbViews.html')
x

In [19]:
y1 = alt.Chart(df).mark_bar(color='red').encode(
    x='index',
    y='votes',
    color= alt.Color('season', scale=alt.Scale(scheme='rainbow')),
    tooltip = ["season","episode","year"]
).properties(
    title='Number of Votes per Episode',
     width=350,
     height=350
)

y1.encoding.y.title = 'Votes'
y1.encoding.x.title = 'Episodes'
y1.save('bbVotes.html')
y1

In [8]:
season1Votes = []
season2Votes = []
season3Votes = []
season4Votes = []
season5Votes = []
for index,row in df.iterrows():
    if row['season'] == 1:
        season1Votes.append(row['votes'])
    if row['season'] == 2:
        season2Votes.append(row['votes'])
    if row['season'] == 3:
        season3Votes.append(row['votes'])
    if row['season'] == 4:
        season4Votes.append(row['votes'])
    if row['season'] == 5:
        season5Votes.append(row['votes'])
        

### Statsistics

In [9]:
print("Number of Episodes per Season")
print("S1:\t"+str(len(season1Votes)))
print("S2:\t"+str(len(season2Votes)))
print("S3:\t"+str(len(season3Votes)))
print("S4:\t"+str(len(season4Votes)))
print("S5:\t"+str(len(season5Votes)))

Number of Episodes per Season
S1:	7
S2:	13
S3:	13
S4:	13
S5:	16


In [10]:
print("Average Number of Votes by Season")
print("-"*25)
print("Season 1:\t"+str(sum(season1Votes) // len(season1Votes)))
print("Season 2:\t"+str(sum(season2Votes) // len(season2Votes)))
print("Season 3:\t"+str(sum(season3Votes) // len(season3Votes)))
print("Season 4:\t"+str(sum(season4Votes) // len(season4Votes)))
print("Season 5:\t"+str(sum(season5Votes) // len(season5Votes)))

Average Number of Votes by Season
-------------------------
Season 1:	17816
Season 2:	14844
Season 3:	15239
Season 4:	17099
Season 5:	30890


In [11]:
print("Percentage Increase in Votes per Season")
print("-"*25)
percInc12 = (((sum(season2Votes) - sum(season1Votes)) / sum(season1Votes)) *100)
percInc23 = (((sum(season3Votes) - sum(season2Votes)) / sum(season2Votes)) *100)
percInc34 = (sum(season4Votes) - sum(season3Votes)) / sum(season3Votes) *100
percInc45 = (sum(season5Votes) - sum(season4Votes)) / sum(season4Votes) *100

print("Season 1 -> Season 2:\t% "+ str(round(percInc12,2)))
print("Season 2 -> Season 3:\t% "+ str(round(percInc23,2)))
print("Season 3 -> Season 4:\t% "+ str(round(percInc34,2)))
print("Season 4 -> Season 5:\t% "+ str(round(percInc45,2)))

Percentage Increase in Votes per Season
-------------------------
Season 1 -> Season 2:	% 54.73
Season 2 -> Season 3:	% 2.66
Season 3 -> Season 4:	% 12.21
Season 4 -> Season 5:	% 122.34


In [12]:
season1US = []
season2US = []
season3US = []
season4US = []
season5US = []
for index,row in df.iterrows():
    if row['season'] == 1:
        season1US.append(row['us-viewers'])
    if row['season'] == 2:
        season2US.append(row['us-viewers'])
    if row['season'] == 3:
        season3US.append(row['us-viewers'])
    if row['season'] == 4:
        season4US.append(row['us-viewers'])
    if row['season'] == 5:
        season5US.append(row['us-viewers'])
        

In [13]:
print("Average Number of US Viewers by Season")
print("-"*25)
print("Season 1:\t"+str(round(sum(season1US) / len(season1US),2))+" Million")
print("Season 2:\t"+str(round(sum(season2US) / len(season2US), 2))+" Million")
print("Season 3:\t"+str(round(sum(season3US) / len(season3US), 2))+" Million")
print("Season 4:\t"+str(round(sum(season4US) / len(season4US), 2))+" Million")
print("Season 5:\t"+str(round(sum(season5US) / len(season5US), 2))+" Million")

Average Number of US Viewers by Season
-------------------------
Season 1:	1.23 Million
Season 2:	0.83 Million
Season 3:	1.52 Million
Season 4:	1.87 Million
Season 5:	4.32 Million


In [14]:
print("Percentage Increase in US Viewers per Season")
print("-"*25)
percInc12U = (((sum(season2US) - sum(season1US)) / sum(season1US)) *100)
percInc23U = (((sum(season3US) - sum(season2US)) / sum(season2US)) *100)
percInc34U = (sum(season4US) - sum(season3US)) / sum(season3US) *100
percInc45U = (sum(season5US) - sum(season4US)) / sum(season4US) *100

print("Season 1 -> Season 2:\t% "+ str(round(percInc12U,2)))
print("Season 2 -> Season 3:\t% "+ str(round(percInc23U,2)))
print("Season 3 -> Season 4:\t% "+ str(round(percInc34U,2)))
print("Season 4 -> Season 5:\t% "+ str(round(percInc45U,2)))

Percentage Increase in US Viewers per Season
-------------------------
Season 1 -> Season 2:	% 25.9
Season 2 -> Season 3:	% 81.92
Season 3 -> Season 4:	% 23.07
Season 4 -> Season 5:	% 184.51
