In [1]:
import requests

DATA_URL = (
    "https://github.com/astrojuanlu/ie-mbd-python-data-analysis-i/"
    "raw/main/data/rick-and-morty.json"
)

data = requests.get(DATA_URL).json()
print(type(data), len(data))

<class 'dict'> 24


In [2]:
len(data["_embedded"]["episodes"])

51

In [3]:
season_counts = {}
for episode in data["_embedded"]["episodes"]:
    if episode["season"] not in season_counts:
        season_counts[episode["season"]] = 1
    else:
        season_counts[episode["season"]] += 1

season_counts

{1: 11, 2: 10, 3: 10, 4: 10, 5: 10}

In [4]:
from statistics import mean, stdev

In [5]:
ratings = []
for episode in data["_embedded"]["episodes"]:
    ratings.append(episode["rating"]["average"])

mean(ratings), stdev(ratings)

(8.876470588235295, 0.4654409673821666)

In [6]:
summaries = [
    episode["summary"].replace("<p>", "").replace("</p>", "")
    for episode in data["_embedded"]["episodes"]
]

word_counts = {}
for word in " ".join(summaries).split():
    if word not in word_counts:
        word_counts[word] = 1
    else:
        word_counts[word] += 1

sorted(word_counts.items(), key=lambda pair: pair[1], reverse=True)[:15]

[('a', 42),
 ('to', 37),
 ('Rick', 34),
 ('and', 34),
 ('Morty', 28),
 ('the', 25),
 ('in', 22),
 ('of', 21),
 ('broh.', 14),
 ('on', 13),
 ('with', 12),
 ('Jerry', 10),
 ('his', 9),
 ('an', 8),
 ('their', 8)]

In [7]:
# Craft a custom set of stopwords, or use an external one
# stopwords = {"a", "to", "and", "the", "in", "of", "on", "his", "an", "their", "with", "is", "for", "this", "are", "have", "they", "but"}
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

clean_word_counts = {
    word: count
    for word, count in word_counts.items()
    if word not in ENGLISH_STOP_WORDS
}

sorted(clean_word_counts.items(), key=lambda pair: pair[1], reverse=True)[:15]

[('Rick', 34),
 ('Morty', 28),
 ('broh.', 14),
 ('Jerry', 10),
 ('new', 7),
 ('gets', 5),
 ('family', 5),
 ('Beth', 5),
 ('takes', 4),
 ("It's", 4),
 ('The', 4),
 ("Morty's", 3),
 ('time', 3),
 ('Rick.', 3),
 ('adventure', 3)]

---

Alternative solutions with pandas:

In [8]:
import pandas as pd

In [9]:
df = pd.DataFrame.from_records(data["_embedded"]["episodes"])
df.head()

Unnamed: 0,id,url,name,season,number,type,airdate,airtime,airstamp,runtime,rating,image,summary,_links
0,14308,https://www.tvmaze.com/episodes/14308/rick-and...,Pilot,1,1,regular,2013-12-02,22:30,2013-12-03T03:30:00+00:00,30,{'average': 8.6},{'medium': 'https://static.tvmaze.com/uploads/...,<p>Rick takes Morty to another dimension to ge...,{'self': {'href': 'https://api.tvmaze.com/epis...
1,14309,https://www.tvmaze.com/episodes/14309/rick-and...,Lawnmower Dog,1,2,regular,2013-12-09,22:30,2013-12-10T03:30:00+00:00,30,{'average': 8.9},{'medium': 'https://static.tvmaze.com/uploads/...,"<p>Morty's small, white dog Snuffles gets on t...",{'self': {'href': 'https://api.tvmaze.com/epis...
2,14310,https://www.tvmaze.com/episodes/14310/rick-and...,Anatomy Park,1,3,regular,2013-12-16,22:30,2013-12-17T03:30:00+00:00,30,{'average': 9},{'medium': 'https://static.tvmaze.com/uploads/...,<p>It's around Christmas time and Jerry's pare...,{'self': {'href': 'https://api.tvmaze.com/epis...
3,14311,https://www.tvmaze.com/episodes/14311/rick-and...,M. Night Shaym-Aliens!,1,4,regular,2014-01-13,22:30,2014-01-14T03:30:00+00:00,30,{'average': 9},{'medium': 'https://static.tvmaze.com/uploads/...,<p>Rick and Morty try to get to the bottom of ...,{'self': {'href': 'https://api.tvmaze.com/epis...
4,14312,https://www.tvmaze.com/episodes/14312/rick-and...,Meeseeks and Destroy,1,5,regular,2014-01-20,22:30,2014-01-21T03:30:00+00:00,30,{'average': 9},{'medium': 'https://static.tvmaze.com/uploads/...,<p>Rick provides the family with a solution to...,{'self': {'href': 'https://api.tvmaze.com/epis...


In [10]:
len(df)

51

In [11]:
df.groupby("season").size()

season
1    11
2    10
3    10
4    10
5    10
dtype: int64

In [12]:
df["rating"].str["average"].describe()

count    51.000000
mean      8.876471
std       0.465441
min       7.500000
25%       8.600000
50%       9.000000
75%       9.200000
max       9.800000
Name: rating, dtype: float64

Extra trick with Python `collections`:

In [13]:
from collections import Counter

In [14]:
summaries = df["summary"].str.replace("<p>", "").str.replace("</p>", "").tolist()

word_counts = Counter(" ".join(summaries).split())

word_counts.most_common(15)

[('a', 42),
 ('to', 37),
 ('Rick', 34),
 ('and', 34),
 ('Morty', 28),
 ('the', 25),
 ('in', 22),
 ('of', 21),
 ('broh.', 14),
 ('on', 13),
 ('with', 12),
 ('Jerry', 10),
 ('his', 9),
 ('an', 8),
 ('their', 8)]

In [15]:
clean_word_counts = Counter({
    word: count
    for word, count in word_counts.items()
    if word not in ENGLISH_STOP_WORDS
})

clean_word_counts.most_common(15)

[('Rick', 34),
 ('Morty', 28),
 ('broh.', 14),
 ('Jerry', 10),
 ('new', 7),
 ('gets', 5),
 ('family', 5),
 ('Beth', 5),
 ('takes', 4),
 ("It's", 4),
 ('The', 4),
 ("Morty's", 3),
 ('time', 3),
 ('Rick.', 3),
 ('adventure', 3)]