# 1. Shared Indexes
Pandas dataframe share a row index across columns. By default, this is an integer index. Pandas enforces this shared row index by throwing an error if we read in a CSV file with columns that contain a different number of elements.

In [2]:
import pandas as pd

fandango = pd.read_csv("fandango_score_comparison2.csv")
print(fandango.head(2))
print(fandango.index)
print(fandango.index.values[0])

   Unnamed: 0                            FILM  RottenTomatoes  \
0         NaN  Avengers: Age of Ultron (2015)              74   
1         NaN               Cinderella (2015)              85   

   RottenTomatoes_User  Metacritic  Metacritic_User  IMDB  Fandango_Stars  \
0                   86          66              7.1   7.8             5.0   
1                   80          67              7.5   7.1             5.0   

   Fandango_Ratingvalue  RT_norm         ...           IMDB_norm  \
0                   4.5     3.70         ...                3.90   
1                   4.5     4.25         ...                3.55   

   RT_norm_round  RT_user_norm_round  Metacritic_norm_round  \
0            3.5                 4.5                    3.5   
1            4.5                 4.0                    3.5   

   Metacritic_user_norm_round  IMDB_norm_round  Metacritic_user_vote_count  \
0                         3.5              4.0                        1330   
1                    

# 2: Using Integer Indexes To Select Rows

In [None]:
# First five rows
fandango[0:5]
# From row at 140 and higher
fandango[140:]

To select an individual row, however, we'll need to use the iloc[] method. This method accepts the following objects for selection:

* An integer
* A list of integers
* A slice object
* A Boolean array

In [None]:
# Just row at index 50
fandango.iloc[50]
# Just row at index 45 and 90
fandango.iloc[[45,90]]

first_last = fandango.iloc[[0,len(fandango)-1]]

# 3. Using custom indexes

In [7]:
# use column "FILM" as index
fandango_films = fandango.set_index(keys="FILM", drop=False)
print(fandango_films[0:10])

                                Unnamed: 0                            FILM  \
FILM                                                                         
Avengers: Age of Ultron (2015)         NaN  Avengers: Age of Ultron (2015)   
Cinderella (2015)                      NaN               Cinderella (2015)   
Ant-Man (2015)                         NaN                  Ant-Man (2015)   
Do You Believe? (2015)                 NaN          Do You Believe? (2015)   
Hot Tub Time Machine 2 (2015)          NaN   Hot Tub Time Machine 2 (2015)   
The Water Diviner (2015)               NaN        The Water Diviner (2015)   
Irrational Man (2015)                  NaN           Irrational Man (2015)   
Top Five (2014)                        NaN                 Top Five (2014)   
Shaun the Sheep Movie (2015)           NaN    Shaun the Sheep Movie (2015)   
Love & Mercy (2015)                    NaN             Love & Mercy (2015)   

                                RottenTomatoes  RottenTomatoes_

# 4: Using A Custom Index For Selection

In [9]:
# Slice using either bracket notation or loc[]
fandango_films["Avengers: Age of Ultron (2015)":"Hot Tub Time Machine 2 (2015)"]
fandango_films.loc["Avengers: Age of Ultron (2015)":"Hot Tub Time Machine 2 (2015)"]

# Specific movie
fandango_films.loc['Kumiko, The Treasure Hunter (2015)']

# Selecting list of movies
movies = ['Kumiko, The Treasure Hunter (2015)', 'Do You Believe? (2015)', 'Ant-Man (2015)']
fandango_films.loc[movies]

movie_list = ["The Lazarus Effect (2015)", "Gett: The Trial of Viviane Amsalem (2015)", "Mr. Holmes (2015)"]
best_movies_ever = fandango_films.loc[movie_list]
print(best_movies_ever)

                                           Unnamed: 0  \
FILM                                                    
The Lazarus Effect (2015)                         NaN   
Gett: The Trial of Viviane Amsalem (2015)         NaN   
Mr. Holmes (2015)                                 NaN   

                                                                                FILM  \
FILM                                                                                   
The Lazarus Effect (2015)                                  The Lazarus Effect (2015)   
Gett: The Trial of Viviane Amsalem (2015)  Gett: The Trial of Viviane Amsalem (2015)   
Mr. Holmes (2015)                                                  Mr. Holmes (2015)   

                                           RottenTomatoes  \
FILM                                                        
The Lazarus Effect (2015)                              14   
Gett: The Trial of Viviane Amsalem (2015)             100   
Mr. Holmes (2015)            

# 5: Apply() Logic Over The Columns In A Dataframe

In [10]:
import numpy as np

# returns the data types as a Series
types = fandango_films.dtypes

# filter data types to just floats, index attributes returns just column names
float_columns = types[types.values == 'float64'].index

# use bracket notation to filter columns to just float columns
float_df = fandango_films[float_columns]

# `x` is a Series object representing a column
deviations = float_df.apply(lambda x: np.std(x))

print(deviations)

Unnamed: 0                         NaN
Metacritic_User               1.505529
IMDB                          0.955447
Fandango_Stars                0.538532
Fandango_Ratingvalue          0.501106
RT_norm                       1.503265
RT_user_norm                  0.997787
Metacritic_norm               0.972522
Metacritic_user_nom           0.752765
IMDB_norm                     0.477723
RT_norm_round                 1.509404
RT_user_norm_round            1.003559
Metacritic_norm_round         0.987561
Metacritic_user_norm_round    0.785412
IMDB_norm_round               0.501043
Fandango_Difference           0.152141
dtype: float64


# 6: Apply() Logic Over Columns: Practice

In [11]:
double_df = float_df.apply(lambda x: x*2)
print(double_df.head(1))

halved_df = float_df.apply(lambda x: x/2)
print(halved_df.head(1))

                                Unnamed: 0  Metacritic_User  IMDB  \
FILM                                                                
Avengers: Age of Ultron (2015)         NaN             14.2  15.6   

                                Fandango_Stars  Fandango_Ratingvalue  RT_norm  \
FILM                                                                            
Avengers: Age of Ultron (2015)            10.0                   9.0      7.4   

                                RT_user_norm  Metacritic_norm  \
FILM                                                            
Avengers: Age of Ultron (2015)           8.6              6.6   

                                Metacritic_user_nom  IMDB_norm  RT_norm_round  \
FILM                                                                            
Avengers: Age of Ultron (2015)                  7.1        7.8            7.0   

                                RT_user_norm_round  Metacritic_norm_round  \
FILM                           

# 7: Apply() Over Dataframe Rows

In [12]:
rt_mt_user = float_df[['RT_user_norm', 'Metacritic_user_nom']]
rt_mt_deviations = rt_mt_user.apply(lambda x: np.std(x), axis=1)
print(rt_mt_deviations[0:5])

rt_mt_means = rt_mt_user.apply(np.mean, axis=1)
print(rt_mt_means.head())

FILM
Avengers: Age of Ultron (2015)    0.375
Cinderella (2015)                 0.125
Ant-Man (2015)                    0.225
Do You Believe? (2015)            0.925
Hot Tub Time Machine 2 (2015)     0.150
dtype: float64
FILM
Avengers: Age of Ultron (2015)    3.925
Cinderella (2015)                 3.875
Ant-Man (2015)                    4.275
Do You Believe? (2015)            3.275
Hot Tub Time Machine 2 (2015)     1.550
dtype: float64
