# Programming and Data Analysis

> Assignment 5

Kuo, Yao-Jen <yaojenkuo@ntu.edu.tw> from [DATAINPOINT](https://www.datainpoint.com)

In [None]:
import unittest
import json
import requests
import numpy as np
import pandas as pd

## Instructions

- We've imported necessary modules at the top of each assignment.
- We've defined the names of functions/inputs/parameters for you.
- Save a copy in your own Google Drive before getting started.
    - File -> Save a copy in Drive.
- Write down your solution between the comments `### BEGIN SOLUTION` and `### END SOLUTION`.
- It is NECESSARY to `return` the answer, tests will fail by just printing out the answer.
- DON'Ts
    - DON'T use `input()` function in your assignments.
    - DON'T use Safari browser to export Colab as Python script.
    - DON'T nest your solution in another `def` function code block.
    - DON'T install packages using `!pip install <package_name>` since it is not a valid Python syntax(it is indeed a valid bash command though.)
- Running tests to see if your solutions are right:
    - File -> Save.
    - Runtime -> Restart and run all.
- When you are ready to submit, click File -> Download -> Download `.py`.

![](https://i.imgur.com/Y1BcDdx.png)

- Open a new Colab in a private window, upload the script and run tests again before submission to make sure the script is executable in a fresh new Colab.

![](https://i.imgur.com/ojlvbds.png)

- Upload to the Assignment session on NTU COOL.

## In the following exercises, you will need some files to complete the functions. Run the cell below to download those files at your working directory.

In [None]:
import requests

file_names = ["teams.json", "players.json", "movies.csv"]
for file_name in file_names:
    file_url = f"https://raw.githubusercontent.com/datainpoint/asgmts-programming-and-data-analysis-ntu-2022/main/{file_name}"
    r = requests.get(file_url)
    with open(file_name , 'wb') as f:
        f.write(r.content)

## 01. Define a function `create_nba_teams()` which creates a DataFrame of NBA franchise teams in standard league given nested key `["league"]["standard"]` of `teams.json` in working directory.

```
        teamId tricode confName    divName           city  \
0   1610612737     ATL     East  Southeast        Atlanta   
1   1610612738     BOS     East   Atlantic         Boston   
2   1610612751     BKN     East   Atlantic       Brooklyn   
3   1610612766     CHA     East  Southeast      Charlotte   
4   1610612741     CHI     East    Central        Chicago   
5   1610612739     CLE     East    Central      Cleveland   
6   1610612742     DAL     West  Southwest         Dallas   
7   1610612743     DEN     West  Northwest         Denver   
8   1610612765     DET     East    Central        Detroit   
9   1610612744     GSW     West    Pacific   Golden State   
10  1610612745     HOU     West  Southwest        Houston   
11  1610612754     IND     East    Central        Indiana   
12  1610612746     LAC     West    Pacific             LA   
13  1610612747     LAL     West    Pacific    Los Angeles   
14  1610612763     MEM     West  Southwest        Memphis   
15  1610612748     MIA     East  Southeast          Miami   
16  1610612749     MIL     East    Central      Milwaukee   
17  1610612750     MIN     West  Northwest      Minnesota   
18  1610612740     NOP     West  Southwest    New Orleans   
19  1610612752     NYK     East   Atlantic       New York   
20  1610612760     OKC     West  Northwest  Oklahoma City   
21  1610612753     ORL     East  Southeast        Orlando   
22  1610612755     PHI     East   Atlantic   Philadelphia   
23  1610612756     PHX     West    Pacific        Phoenix   
24  1610612757     POR     West  Northwest       Portland   
25  1610612758     SAC     West    Pacific     Sacramento   
26  1610612759     SAS     West  Southwest    San Antonio   
27  1610612761     TOR     East   Atlantic        Toronto   
28  1610612762     UTA     West  Northwest           Utah   
29  1610612764     WAS     East  Southeast     Washington   

                  fullName  
0            Atlanta Hawks  
1           Boston Celtics  
2            Brooklyn Nets  
3        Charlotte Hornets  
4            Chicago Bulls  
5      Cleveland Cavaliers  
6         Dallas Mavericks  
7           Denver Nuggets  
8          Detroit Pistons  
9    Golden State Warriors  
10         Houston Rockets  
11          Indiana Pacers  
12             LA Clippers  
13      Los Angeles Lakers  
14       Memphis Grizzlies  
15              Miami Heat  
16         Milwaukee Bucks  
17  Minnesota Timberwolves  
18    New Orleans Pelicans  
19         New York Knicks  
20   Oklahoma City Thunder  
21           Orlando Magic  
22      Philadelphia 76ers  
23            Phoenix Suns  
24  Portland Trail Blazers  
25        Sacramento Kings  
26       San Antonio Spurs  
27         Toronto Raptors  
28               Utah Jazz  
29      Washington Wizards
```

In [None]:
def create_nba_teams() -> pd.core.frame.DataFrame:
    """
    >>> nba_teams = create_nba_teams()
    >>> type(nba_teams)
    pandas.core.frame.DataFrame
    >>> nba_teams.shape
    (30, 6)
    """
    ### BEGIN SOLUTION
    
    ### END SOLUTION

## 02. Define a function `subset_nba_teams()` which subsets teams whose `tricode` is not the first 3 letters of its city in upper-cased form.

```
   tricode           city
2      BKN       Brooklyn
9      GSW   Golden State
12     LAC             LA
13     LAL    Los Angeles
18     NOP    New Orleans
19     NYK       New York
20     OKC  Oklahoma City
23     PHX        Phoenix
26     SAS    San Antonio
```

In [None]:
def subset_nba_teams() -> pd.core.frame.DataFrame:
    """
    >>> nba_teams_subset = subset_nba_teams()
    >>> type(nba_teams_subset)
    pandas.core.frame.DataFrame
    >>> nba_teams_subset.shape
    (9, 2)
    """
    ### BEGIN SOLUTION
    
    ### END SOLUTION

## 03. Define a function `create_nba_players()` which creates a DataFrame of NBA franchise team's players in standard league given nested key `["league"]["standard"]` of `players.json` in working directory.

```
        team_id first_name  last_name        player_name
0    1610612761   Precious    Achiuwa   Precious Achiuwa
1    1610612763     Steven      Adams       Steven Adams
2    1610612748        Bam    Adebayo        Bam Adebayo
3    1610612762      Ochai     Agbaji       Ochai Agbaji
4    1610612763      Santi     Aldama       Santi Aldama
..          ...        ...        ...                ...
572  1610612737       Trae      Young         Trae Young
573  1610612748       Omer  Yurtseven     Omer Yurtseven
574  1610612762       Cody     Zeller        Cody Zeller
575  1610612759    Stephen  Zimmerman  Stephen Zimmerman
576  1610612746      Ivica      Zubac        Ivica Zubac

[577 rows x 4 columns]
```

In [None]:
def create_nba_players() -> pd.core.frame.DataFrame:
    """
    >>> nba_players = create_nba_players()
    >>> type(nba_players)
    pandas.core.frame.DataFrame
    >>> nba_players.shape
    (577, 4)
    """
    ### BEGIN SOLUTION
    
    ### END SOLUTION

## 04. Define a function `subset_nba_players()` which subsets players whose `firstName` has exactly 5 letters.

```
        team_id         player_name
3    1610612762        Ochai Agbaji
4    1610612763        Santi Aldama
19   1610612762      Udoka Azubuike
24   1610612741          Lonzo Ball
26   1610612753      Paolo Banchero
..          ...                 ...
562  1610612763  Vince Williams Jr.
565  1610612739       Dylan Windler
567  1610612744       James Wiseman
569  1610612764        Delon Wright
576  1610612746         Ivica Zubac

[143 rows x 2 columns]
```

In [None]:
def subset_nba_players() -> pd.core.frame.DataFrame:
    """
    >>> nba_players_subset = subset_nba_players()
    >>> type(nba_players_subset)
    pandas.core.frame.DataFrame
    >>> nba_players_subset.shape
    (143, 2)
    """
    ### BEGIN SOLUTION
    
    ### END SOLUTION

## 05.  Define a function `create_nba_teams_and_players()` which generates a current player roster DataFrame for NBA franchise teams in standard league given `teams.json` and `players.json` in working directory.

```
              team_name         player_name
0         Atlanta Hawks   Bogdan Bogdanovic
1         Atlanta Hawks        Clint Capela
2         Atlanta Hawks        John Collins
3         Atlanta Hawks      Jarrett Culver
4         Atlanta Hawks       Trent Forrest
..                  ...                 ...
572  Washington Wizards  Kristaps Porzingis
573  Washington Wizards      Jordan Schakel
574  Washington Wizards         Craig Sword
575  Washington Wizards         Isaiah Todd
576  Washington Wizards        Delon Wright

[577 rows x 2 columns]
```

In [None]:
def create_nba_teams_and_players() -> pd.core.frame.DataFrame:
    """
    >>> nba_teams_and_players = create_nba_teams_and_players()
    >>> type(nba_teams_and_players)
    pandas.core.frame.DataFrame
    >>> nba_teams_and_players.shape
    (577, 2)
    """
    ### BEGIN SOLUTION
    
    ### END SOLUTION

## 06.  Define a function `subset_nba_teams_and_players()` which finds out the current player roster of Boston Celtics and Golden State Warriors given `teams.json` and `players.json` in working directory.

```
                 team_name             player_name
16          Boston Celtics         Malcolm Brogdon
17          Boston Celtics            Jaylen Brown
18          Boston Celtics              JD Davison
19          Boston Celtics        Danilo Gallinari
20          Boston Celtics           Blake Griffin
21          Boston Celtics              Sam Hauser
22          Boston Celtics              Al Horford
23          Boston Celtics          Justin Jackson
24          Boston Celtics       Mfiondu Kabengele
25          Boston Celtics             Luke Kornet
26          Boston Celtics             Jake Layman
27          Boston Celtics        Payton Pritchard
28          Boston Celtics            Marcus Smart
29          Boston Celtics            Jayson Tatum
30          Boston Celtics             Noah Vonleh
31          Boston Celtics           Derrick White
32          Boston Celtics          Grant Williams
33          Boston Celtics     Robert Williams III
167  Golden State Warriors     Patrick Baldwin Jr.
168  Golden State Warriors           Stephen Curry
169  Golden State Warriors        Donte DiVincenzo
170  Golden State Warriors          Draymond Green
171  Golden State Warriors          JaMychal Green
172  Golden State Warriors          Andre Iguodala
173  Golden State Warriors               Ty Jerome
174  Golden State Warriors        Jonathan Kuminga
175  Golden State Warriors            Anthony Lamb
176  Golden State Warriors            Kevon Looney
177  Golden State Warriors             Moses Moody
178  Golden State Warriors            Jordan Poole
179  Golden State Warriors         Lester Quinones
180  Golden State Warriors         Jerome Robinson
181  Golden State Warriors            Ryan Rollins
182  Golden State Warriors             Pat Spencer
183  Golden State Warriors           Klay Thompson
184  Golden State Warriors  Quinndary Weatherspoon
185  Golden State Warriors          Andrew Wiggins
186  Golden State Warriors           James Wiseman
```

In [None]:
def subset_nba_teams_and_players() -> pd.core.frame.DataFrame:
    """
    >>> nba_teams_and_players_subset = subset_nba_teams_and_players()
    >>> type(nba_teams_and_players_subset)
    pandas.core.frame.DataFrame
    >>> nba_teams_and_players_subset.shape
    (38, 2)
    """
    ### BEGIN SOLUTION
    
    ### END SOLUTION

## 07. Define a function `create_movies()` which creates a DataFrame of IMDb's top 250 rated movies of all time given `movies.csv` in working directory.

```
                        title              director  release_year  rating
0    The Shawshank Redemption        Frank Darabont          1994     9.2
1               The Godfather  Francis Ford Coppola          1972     9.2
2             The Dark Knight     Christopher Nolan          2008     9.0
3       The Godfather Part II  Francis Ford Coppola          1974     9.0
4                12 Angry Men          Sidney Lumet          1957     9.0
..                        ...                   ...           ...     ...
245               Dersu Uzala        Akira Kurosawa          1975     8.0
246                   Aladdin          Ron Clements          1992     8.0
247                  The Help           Tate Taylor          2011     8.0
248            The Iron Giant             Brad Bird          1999     8.0
249                    Gandhi  Richard Attenborough          1982     8.0

[250 rows x 4 columns]
```

In [None]:
def create_movies() -> pd.core.frame.DataFrame:
    """
    >>> movies = create_movies()
    >>> type(movies)
    pandas.core.frame.DataFrame
    >>> movies.shape
    (250, 4)
    """
    ### BEGIN SOLUTION
    
    ### END SOLUTION

## 08. Define a function named `find_top_gun_maverick()` which finds out "Top Gun: Maverick" given `movies.csv` in working directory.

```
                title         director  release_year  rating
81  Top Gun: Maverick  Joseph Kosinski          2022     8.3
```

In [None]:
def find_top_gun_maverick() -> pd.core.frame.DataFrame:
    """
    >>> top_gun_maverick = find_top_gun_maverick()
    >>> type(top_gun_maverick)
    pandas.core.frame.DataFrame
    >>> top_gun_maverick.shape
    (1, 4)
    """
    ### BEGIN SOLUTION
    
    ### END SOLUTION

## 09. Define a function named `find_starwars_episodes()` which finds out "Star Wars" episodes given `movies.csv` in working directory.

```
                                             title          director  \
14  Star Wars: Episode V - The Empire Strikes Back    Irvin Kershner   
27              Star Wars: Episode IV - A New Hope      George Lucas   
88      Star Wars: Episode VI - Return of the Jedi  Richard Marquand   

    release_year  rating  
14          1980     8.7  
27          1977     8.6  
88          1983     8.3
```

In [None]:
def find_starwars_episodes() -> pd.core.frame.DataFrame:
    """
    >>> starwars_episodes = find_starwars_episodes()
    >>> type(starwars_episodes)
    pandas.core.frame.DataFrame
    >>> starwars_episodes.shape
    (3, 4)
    """
    ### BEGIN SOLUTION
    
    ### END SOLUTION

## 10. Define a function named `calculate_average_rating_each_year()` which computes the average rating by year given `movies.csv` in working directory.

```
    release_year    rating
0           1921  8.200000
1           1924  8.100000
2           1925  8.100000
3           1926  8.100000
4           1927  8.200000
..           ...       ...
81          2018  8.300000
82          2019  8.233333
83          2020  8.200000
84          2021  8.100000
85          2022  8.150000

[86 rows x 2 columns]
```

In [None]:
def calculate_average_rating_each_year() -> pd.core.frame.DataFrame:
    """
    >>> average_rating_each_year = calculate_average_rating_each_year()
    >>> type(average_rating_each_year)
    pandas.core.frame.DataFrame
    >>> average_rating_each_year.shape
    (86, 2)
    """
    ### BEGIN SOLUTION
    
    ### END SOLUTION

## Running tests

Assignment session is finished, click Runtime -> Restart and run all to run the following tests.

In [None]:
class TestAssignmentFive(unittest.TestCase):
    def test_01_create_nba_teams(self):
        nba_teams = create_nba_teams()
        self.assertIsInstance(nba_teams, pd.core.frame.DataFrame)
        self.assertEqual(nba_teams.shape, (30, 6))
        self.assertEqual(nba_teams["tricode"].nunique(), 30)
        self.assertEqual(nba_teams["confName"].nunique(), 2)
        self.assertEqual(nba_teams["divName"].nunique(), 6)
    def test_02_subset_nba_teams(self):
        nba_teams_subset = subset_nba_teams()
        self.assertIsInstance(nba_teams_subset, pd.core.frame.DataFrame)
        self.assertEqual(nba_teams_subset.shape, (9, 2))
    def test_03_create_nba_players(self):
        nba_players = create_nba_players()
        self.assertIsInstance(nba_players, pd.core.frame.DataFrame)
        self.assertEqual(nba_players.shape, (577, 4))
        self.assertEqual(nba_players["team_id"].nunique(), 30)
    def test_04_subset_nba_players(self):
        nba_players_subset = subset_nba_players()
        self.assertIsInstance(nba_players_subset, pd.core.frame.DataFrame)
        self.assertEqual(nba_players_subset.shape, (143, 2))
    def test_05_create_nba_teams_and_players(self):
        nba_teams_and_players = create_nba_teams_and_players()
        self.assertIsInstance(nba_teams_and_players, pd.core.frame.DataFrame)
        self.assertEqual(nba_teams_and_players.shape, (577, 2))
        self.assertEqual(nba_teams_and_players["team_name"].nunique(), 30)
    def test_06_subset_nba_teams_and_players(self):
        nba_teams_and_players_subset = subset_nba_teams_and_players()
        self.assertIsInstance(nba_teams_and_players_subset, pd.core.frame.DataFrame)
        self.assertEqual(nba_teams_and_players_subset.shape, (38, 2))
        self.assertEqual(nba_teams_and_players_subset["team_name"].nunique(), 2)
    def test_07_create_movies(self):
        movies = create_movies()
        self.assertIsInstance(movies, pd.core.frame.DataFrame)
        self.assertEqual(movies.shape, (250, 4))
    def test_08_find_top_gun_maverick(self):
        top_gun_maverick = find_top_gun_maverick()
        self.assertIsInstance(top_gun_maverick, pd.core.frame.DataFrame)
        self.assertEqual(top_gun_maverick.shape, (1, 4))
    def test_09_find_starwars_episodes(self):
        starwars_episodes = find_starwars_episodes()
        self.assertIsInstance(starwars_episodes, pd.core.frame.DataFrame)
        self.assertEqual(starwars_episodes.shape, (3, 4))
    def test_10_calculate_average_rating_each_year(self):
        average_rating_each_year = calculate_average_rating_each_year()
        self.assertIsInstance(average_rating_each_year, pd.core.frame.DataFrame)
        self.assertEqual(average_rating_each_year.shape, (86, 2))

suite = unittest.TestLoader().loadTestsFromTestCase(TestAssignmentFive)
runner = unittest.TextTestRunner(verbosity=2)
test_results = runner.run(suite)
number_of_failures = len(test_results.failures)
number_of_errors = len(test_results.errors)
number_of_test_runs = test_results.testsRun
number_of_successes = number_of_test_runs - (number_of_failures + number_of_errors)

In [None]:
print("You've got {} successes among {} questions.".format(number_of_successes, number_of_test_runs))