# Series Level

In [16]:
import pandas as pd

fandango = pd.read_csv('fandango_score_comparison.csv')
fandango.head(2)

Unnamed: 0,FILM,RottenTomatoes,RottenTomatoes_User,Metacritic,Metacritic_User,IMDB,Fandango_Stars,Fandango_Ratingvalue,RT_norm,RT_user_norm,...,IMDB_norm,RT_norm_round,RT_user_norm_round,Metacritic_norm_round,Metacritic_user_norm_round,IMDB_norm_round,Metacritic_user_vote_count,IMDB_user_vote_count,Fandango_votes,Fandango_Difference
0,Avengers: Age of Ultron (2015),74,86,66,7.1,7.8,5.0,4.5,3.7,4.3,...,3.9,3.5,4.5,3.5,3.5,4.0,1330,271107,14846,0.5
1,Cinderella (2015),85,80,67,7.5,7.1,5.0,4.5,4.25,4.0,...,3.55,4.5,4.0,3.5,4.0,3.5,249,65709,12640,0.5


### Create a customized series

In [17]:
from pandas import Series
series_film = fandango['FILM']
series_rt = fandango['RottenTomatoes']
# create a new series whose index is series_film, whose value is series_rt
film_names = series_film.values
rt_scores = series_rt.values

series_custom = Series(data = rt_scores , index = film_names)
series_custom[['Minions (2015)', 'Leviathan (2014)']]

#⚠️ remember the method of 'Series(data, index)' and 'Series.values'

Minions (2015)      54
Leviathan (2014)    99
dtype: int64

### Integer Index Preservation

In [18]:
# in customized series, we can still use integer values for selection
fiveten = series_custom[5:11]
print(fiveten)

The Water Diviner (2015)             63
Irrational Man (2015)                42
Top Five (2014)                      86
Shaun the Sheep Movie (2015)         99
Love & Mercy (2015)                  89
Far From The Madding Crowd (2015)    84
dtype: int64


### Reindexing

In [19]:
# when we create a customized series, we disordered the number index, we could use this method to reindex
original_index = series_custom.index
sorted_index = sorted(original_index)
sorted_by_index = series_custom.reindex(sorted_index)
#⚠️ remember the method '.index' & 'sorted()' & '.reindex(index)'

### Sorting by index/values with .sort_index/.sort_values( )

In [20]:
sc2 = series_custom.sort_index()
sc3 = series_custom.sort_values()
print(sc2[0:10])
print(sc3[0:10])

'71 (2015)                    97
5 Flights Up (2015)           52
A Little Chaos (2015)         40
A Most Violent Year (2014)    90
About Elly (2015)             97
Aloha (2015)                  19
American Sniper (2015)        72
American Ultra (2015)         46
Amy (2015)                    97
Annie (2014)                  27
dtype: int64
Paul Blart: Mall Cop 2 (2015)     5
Hitman: Agent 47 (2015)           7
Hot Pursuit (2015)                8
Fantastic Four (2015)             9
Taken 3 (2015)                    9
The Boy Next Door (2015)         10
The Loft (2015)                  11
Unfinished Business (2015)       11
Mortdecai (2015)                 12
Seventh Son (2015)               12
dtype: int64


### Transforming Columns - Series Alignment

In [21]:
import numpy as np
# Add each value with each other
np.add(series_custom, series_custom)
# Apply sine function to each value
np.sin(series_custom)
# Return the highest value (will return a single value, not a Series)
np.max(series_custom)

series_normalized = series_custom/20
# ⚠️Series Alignment will return a new Series object, aligns along index
# ⚠️DataFrame aligns along both indices and columns
# ⚠️even we could compare two different Series objects
series_greater_than_50 = series_custom[series_custom > 50]

rt_critics = Series(fandango['RottenTomatoes'].values, index=fandango['FILM'])
rt_users = Series(fandango['RottenTomatoes_User'].values, index=fandango['FILM'])
rt_mean = Series(data = (rt_critics.values+rt_users.values)/2,index = fandango['FILM'])

# DataFrame Level

### Using Custom Indexes with set_index( drop, inplace )

In [24]:
# assign the FILM column as the custom index for the dataframe
fandango_films = fandango.set_index(fandango['FILM'],drop= False)

### Using a Custom Index for Selection

In [25]:
best_movies_ever = fandango_films.loc[["The Lazarus Effect (2015)","Gett: The Trial of Viviane Amsalem (2015)","Mr. Holmes (2015)"]]

### Apply( ) Over the Columns in a Dataframe by default

In [36]:
# ⚠️之前两者apply()为 Apply() in a DataFrame iterating over row／column
# ⚠️每一列单独计算

# Calculate the standard deviations for each numeric column
import numpy as np
# returns the data types as a Series
types = fandango_films.dtypes
# filter data types to just floats, index attributes returns just column names
float_columns = types[types.values == 'float64'].index
# use bracket notation to filter columns to just float columns
float_df = fandango_films[float_columns]
# `x` is a Series object representing a column
deviations = float_df.apply(lambda x: np.std(x))

#⚠️ remember the method of lambda x: aggfun
double_df = float_df.apply(lambda x: x*2)
halved_df = float_df.apply(lambda x: x/2)

deviations.head(3)

Metacritic_User    1.505529
IMDB               0.955447
Fandango_Stars     0.538532
dtype: float64

### Apply( ) Over the Rows in a Dataframe with axis=1

In [32]:
# ⚠️每一行单独计算
rt_mt_user = float_df[['RT_user_norm', 'Metacritic_user_nom']]
rt_mt_deviations = rt_mt_user.apply(lambda x: np.std(x), axis=1)
rt_mt_deviations.head(3)

FILM
Avengers: Age of Ultron (2015)    0.375
Cinderella (2015)                 0.125
Ant-Man (2015)                    0.225
dtype: float64

In [35]:
wewant = float_df[['RT_user_norm','Metacritic_user_nom']]
rt_mt_means = wewant.apply(lambda x: np.mean(x),axis=1)
rt_mt_means.iloc[0:4]

FILM
Avengers: Age of Ultron (2015)    3.925
Cinderella (2015)                 3.875
Ant-Man (2015)                    4.275
Do You Believe? (2015)            3.275
dtype: float64