# Working with Fandango Score Comparison (Pandas Internals: Series and Dataframe)

### Columns:
* FILM - Film name
* RottenTomatoes - Average critic score on Rotten Tomatoes
* RottenTomatoes_User - Average user score on Rotten Tomatoes
* RT_norm - Average critic score on Rotten Tomatoes (normalized to a 0 to 5-point system)
* RT_user-norm - Average user score on Rotten Tomatoes (normalized to a 0 to 5-point system)
* Metacritic - Average critic score on Metacritic
* Metacritic_User - Average user score on Metacritic

In [1]:
import pandas as pd
import numpy as np

fandango = pd.read_csv("data/fandango_score_comparison.csv")

print (fandango.head(2))

                             FILM  RottenTomatoes  RottenTomatoes_User  \
0  Avengers: Age of Ultron (2015)              74                   86   
1               Cinderella (2015)              85                   80   

   Metacritic  Metacritic_User  IMDB  Fandango_Stars  Fandango_Ratingvalue  \
0          66              7.1   7.8             5.0                   4.5   
1          67              7.5   7.1             5.0                   4.5   

   RT_norm  RT_user_norm         ...           IMDB_norm  RT_norm_round  \
0     3.70           4.3         ...                3.90            3.5   
1     4.25           4.0         ...                3.55            4.5   

   RT_user_norm_round  Metacritic_norm_round  Metacritic_user_norm_round  \
0                 4.5                    3.5                         3.5   
1                 4.0                    3.5                         4.0   

   IMDB_norm_round  Metacritic_user_vote_count  IMDB_user_vote_count  \
0              

In [2]:
series_film = fandango["FILM"]
print(series_film.loc[0:4])

series_rt = fandango["RottenTomatoes"]
print(series_rt.loc[0:4])

0    Avengers: Age of Ultron (2015)
1                 Cinderella (2015)
2                    Ant-Man (2015)
3            Do You Believe? (2015)
4     Hot Tub Time Machine 2 (2015)
Name: FILM, dtype: object
0    74
1    85
2    80
3    18
4    14
Name: RottenTomatoes, dtype: int64


In [3]:
film_names = list(fandango["FILM"])
series_custom = pd.Series(data = list(series_rt), index = film_names)

In [4]:
fiveten = series_custom.iloc[5:11]
print(fiveten)

The Water Diviner (2015)             63
Irrational Man (2015)                42
Top Five (2014)                      86
Shaun the Sheep Movie (2015)         99
Love & Mercy (2015)                  89
Far From The Madding Crowd (2015)    84
dtype: int64


In [5]:
original_index = series_custom.index

new_index = sorted(original_index)

sorted_by_index = series_custom.reindex(new_index)

In [6]:
print(sorted_by_index.iloc[0:5])

'71 (2015)                    97
5 Flights Up (2015)           52
A Little Chaos (2015)         40
A Most Violent Year (2014)    90
About Elly (2015)             97
dtype: int64


In [7]:
print(sorted_by_index.loc['Avengers: Age of Ultron (2015)'])

74


In [8]:
sc2 = series_custom.sort_index()
sc3 = series_custom.sort_values()

print(sc2.iloc[0:10])
print(sc3.iloc[0:10])

'71 (2015)                    97
5 Flights Up (2015)           52
A Little Chaos (2015)         40
A Most Violent Year (2014)    90
About Elly (2015)             97
Aloha (2015)                  19
American Sniper (2015)        72
American Ultra (2015)         46
Amy (2015)                    97
Annie (2014)                  27
dtype: int64
Paul Blart: Mall Cop 2 (2015)     5
Hitman: Agent 47 (2015)           7
Hot Pursuit (2015)                8
Fantastic Four (2015)             9
Taken 3 (2015)                    9
The Boy Next Door (2015)         10
The Loft (2015)                  11
Unfinished Business (2015)       11
Mortdecai (2015)                 12
Seventh Son (2015)               12
dtype: int64


In [9]:
new_df = fandango.loc[:10,["RottenTomatoes","RottenTomatoes_User"]]

In [10]:
print(new_df)

    RottenTomatoes  RottenTomatoes_User
0               74                   86
1               85                   80
2               80                   90
3               18                   84
4               14                   28
5               63                   62
6               42                   53
7               86                   64
8               99                   82
9               89                   87
10              84                   77


In [11]:
rt_series = pd.Series(fandango["RottenTomatoes"].values, index = fandango["FILM"])

In [12]:
best_movies = pd.Series(fandango["RottenTomatoes"].values, index = fandango["FILM"]).sort_values(ascending = False, inplace = False)

In [13]:
print(best_movies)

FILM
Gett: The Trial of Viviane Amsalem (2015)      100
Seymour: An Introduction (2015)                100
Selma (2014)                                    99
Phoenix (2015)                                  99
Song of the Sea (2014)                          99
Leviathan (2014)                                99
Shaun the Sheep Movie (2015)                    99
Timbuktu (2015)                                 99
Mr. Turner (2014)                               98
Paddington (2015)                               98
Inside Out (2015)                               98
Mad Max: Fury Road (2015)                       97
Amy (2015)                                      97
Two Days, One Night (2014)                      97
About Elly (2015)                               97
'71 (2015)                                      97
What We Do in the Shadows (2015)                96
The Salt of the Earth (2015)                    96
Red Army (2015)                                 96
It Follows (2015)         

In [14]:
print(fandango.index)

RangeIndex(start=0, stop=146, step=1)


In [15]:
# select the first and last rows

first_last = fandango.iloc[[0,-1]]

In [16]:
print(first_last.iloc[:,:3])

                                   FILM  RottenTomatoes  RottenTomatoes_User
0        Avengers: Age of Ultron (2015)              74                   86
145  Kumiko, The Treasure Hunter (2015)              87                   63


In [17]:
# assign FILM as the index

fandango_films = fandango.set_index("FILM", drop = False, inplace = False)

In [18]:
print(fandango_films.index)

Index(['Avengers: Age of Ultron (2015)', 'Cinderella (2015)', 'Ant-Man (2015)',
       'Do You Believe? (2015)', 'Hot Tub Time Machine 2 (2015)',
       'The Water Diviner (2015)', 'Irrational Man (2015)', 'Top Five (2014)',
       'Shaun the Sheep Movie (2015)', 'Love & Mercy (2015)',
       ...
       'The Woman In Black 2 Angel of Death (2015)', 'Danny Collins (2015)',
       'Spare Parts (2015)', 'Serena (2015)', 'Inside Out (2015)',
       'Mr. Holmes (2015)', ''71 (2015)', 'Two Days, One Night (2014)',
       'Gett: The Trial of Viviane Amsalem (2015)',
       'Kumiko, The Treasure Hunter (2015)'],
      dtype='object', name='FILM', length=146)


In [19]:
best_movies_ever = fandango_films = fandango_films.loc[["The Lazarus Effect (2015)", "Gett: The Trial of Viviane Amsalem (2015)", "Mr. Holmes (2015)"]]

In [52]:
# make a df with all the float types, then multiply each by two

types = fandango.dtypes
float_columns = types[types.values == "float64"].index
float_df = fandango[float_columns]
float_df_x_2 = float_df.apply(lambda x: x*2)
float_df_std = float_df.apply(lambda x: np.std(x))
print (float_df.iloc[0,])
print (float_df.iloc[:1,].apply(lambda x: np.sum(x), axis = 0)) # will sum each row in the column, therefore with only one row will return unchanged series
print (float_df.iloc[:1,].apply(lambda x: np.sum(x), axis = 1)) # will sum each column in a row, therefore when run on single row returns a single number 

print (float_df_x_2.iloc[:2,:1])
print (float_df_std)

Metacritic_User               7.10
IMDB                          7.80
Fandango_Stars                5.00
Fandango_Ratingvalue          4.50
RT_norm                       3.70
RT_user_norm                  4.30
Metacritic_norm               3.30
Metacritic_user_nom           3.55
IMDB_norm                     3.90
RT_norm_round                 3.50
RT_user_norm_round            4.50
Metacritic_norm_round         3.50
Metacritic_user_norm_round    3.50
IMDB_norm_round               4.00
Fandango_Difference           0.50
Name: 0, dtype: float64
Metacritic_User               7.10
IMDB                          7.80
Fandango_Stars                5.00
Fandango_Ratingvalue          4.50
RT_norm                       3.70
RT_user_norm                  4.30
Metacritic_norm               3.30
Metacritic_user_nom           3.55
IMDB_norm                     3.90
RT_norm_round                 3.50
RT_user_norm_round            4.50
Metacritic_norm_round         3.50
Metacritic_user_norm_round    3

In [56]:
# practicing starting again
fandango = pd.read_csv("data/fandango_score_comparison.csv")

types = fandango.dtypes
float_columns = types[types.values == "float64"].index

float_df = fandango[float_columns]
halved_df = float_df.apply(lambda x: x/2)

print(float_df.iloc[1])
print(halved_df.iloc[0])

Metacritic_User               7.50
IMDB                          7.10
Fandango_Stars                5.00
Fandango_Ratingvalue          4.50
RT_norm                       4.25
RT_user_norm                  4.00
Metacritic_norm               3.35
Metacritic_user_nom           3.75
IMDB_norm                     3.55
RT_norm_round                 4.50
RT_user_norm_round            4.00
Metacritic_norm_round         3.50
Metacritic_user_norm_round    4.00
IMDB_norm_round               3.50
Fandango_Difference           0.50
Name: 1, dtype: float64
Metacritic_User               3.550
IMDB                          3.900
Fandango_Stars                2.500
Fandango_Ratingvalue          2.250
RT_norm                       1.850
RT_user_norm                  2.150
Metacritic_norm               1.650
Metacritic_user_nom           1.775
IMDB_norm                     1.950
RT_norm_round                 1.750
RT_user_norm_round            2.250
Metacritic_norm_round         1.750
Metacritic_user_nor

In [64]:
# get avg of each movies normalized ratings from RT_user_norm and Metacritic_user_norm

rt_mt_means = fandango_films[["RT_user_norm","Metacritic_user_nom"]].apply(lambda x: np.mean(x), axis=1)
print(rt_mt_means[:5])
print(rt_mt_means)

FILM
The Lazarus Effect (2015)                    1.800
Gett: The Trial of Viviane Amsalem (2015)    3.850
Mr. Holmes (2015)                            3.925
dtype: float64
FILM
The Lazarus Effect (2015)                    1.800
Gett: The Trial of Viviane Amsalem (2015)    3.850
Mr. Holmes (2015)                            3.925
dtype: float64
