In [5]:
import pandas as pd

In [None]:
snl = pd.read_csv('ex-data/snldb/snl_title.csv')
snl.head()

In [None]:
episodes = pd.read_csv('ex-data/snldb/snl_episode.csv', parse_dates=["aired"])
episodes.head()

In [None]:
seasons = pd.read_csv('ex-data/snldb/snl_season.csv')
seasons.head()

## Compute the sketches of the season with year 1978

The condition for selecting the rows is a single equality

In [None]:
y1978 = pd.merge(seasons[seasons["year"] == 1978], snl, on="sid")
y1978

## Compute the sketches of the seasons with year 1978-1982

In this case the condition of the selection is more complex and requires the conjuction of two different conditions.
The precedence order betweeen operators implies that we have to use the parenthesis.

In [None]:
years = seasons[(seasons["year"] >= 1978) & (seasons["year"] <= 1982)]
years

In [None]:
pd.merge(years, snl, on="sid")

## Compute the sketches aired in 1978 (consider the table snl_episode.csv)

First we merge the episodes with the snl dataframe, so that we have the air date of each sketch.

In [None]:
all = pd.merge(snl, episodes, on=("sid", "eid"))
all.head()

Then we extract only the relevant sketches

We add a new column for the year

In [None]:
all["year"] = all["aired"].dt.year
all

Then we extract only the relevant sketches

In [None]:
all[all["year"] == 1978]

## For each season, compute the average rating (consider only top 1000 users)

In [None]:
ratings = pd.read_csv('ex-data/snldb/snl_rating.csv')
ratings.head()

In [None]:
avg_rating_of_season = ratings.groupby("sid")["Top 1000 voters"].mean()
avg_rating_of_season

##  For each season, compute the difference between the maximum and the minimum ratings

First we extract the max and min rating for each season. Since we will merge the resulting tables by using the `sid` column, the `merge` is simpler if both dataframes retain the `sid` column (that is, it does not become part of the index).

In [None]:
mins = ratings.groupby("sid", as_index = False)["Top 1000 voters"].min()
mins.head()

In [None]:
maxs = ratings.groupby("sid", as_index = False)["Top 1000 voters"].max()
maxs.head()

In [None]:
merged = pd.merge(mins, maxs, on='sid', suffixes=['_min', '_max'])
merged.head()

In [None]:
merged["diff"] = merged["Top 1000 voters_max"] - merged["Top 1000 voters_min"]
merged[["sid", "diff"]]