In [1]:
import pandas as pd
import numpy as np
import requests

## read the JSON file that you saved in ex02

In [2]:
df = pd.read_json('data/auto.json', orient='records')

## define the format of it in pandas using pd.options.display.float_format: floats should be displayed with two decimals

In [4]:
pd.options.display.float_format = '{:,.2f}'.format

In [6]:
rng = np.random.RandomState(21)
sample_index = [rng.randint(725) for i in range(200)]
sample_data = {'CarNumber': [df['CarNumber'][i] for i in sample_index]}
sample_data.update({'Refund': [rng.choice(df['Refund']) for i in range(200)]})
sample_data.update({'Fines': [rng.choice(df['Fines']) for i in range(200)]})
sample_data.update({'Make': [df['Make'][i] for i in sample_index]})
sample_data.update({'Model': [df['Model'][i] for i in sample_index]})
df_sample = pd.DataFrame(sample_data)

## concatenate the sample with the initial dataframe to a new dataframe con- cat_rows

In [8]:
concat_rows = pd.concat([df, df_sample])

## create a series with the name Year using random integers from 1980 to 2019. Use np.random.seed(21) before generating the years

In [14]:
np.random.seed(21)
years = [np.random.randint(1980, 2019) for i in range(925)]
df_years = pd.DataFrame(years)

## concatenate the series with the dataframe and name it fines

In [16]:
fines = concat_rows.copy()
fines['Year'] = df_years

In [33]:
unique_car_numbers = len(fines["CarNumber"].unique())
unique_car_numbers

531

## get the most popular surnames in the US


In [55]:
surnames = pd.read_json('data/surname.json', orient='records')
surnames = surnames[1:]
surnames = surnames[0]

## create a new series with the surnames (they should not have special char- acters like commas, brackets, etc.) from the data you gathered, the count should be equal to the number of unique car numbers using the sample (use random_state = 21) 

In [46]:
np.random.seed(21)
owners = pd.DataFrame(
    {'CarNumber': fines['CarNumber'],'SURNAME': [np.random.choice(surnames) for i in range(925)]})

## append 5 more observations to the fines dataframe (come up with your own ideas of CarNumber, etc.)

In [53]:
data = {'CarNumber': ['M942OT152RUS', 'Y163O8161RUS', '92918M178RUS', '7184TT36RUS', 'C410X938RUS']}
data.update({'Refund': [np.random.choice(fines['Refund']) for i in range(5)]})
data.update({'Fines': [np.random.choice(fines['Fines']) for i in range(5)]})
data.update({'Make': [np.random.choice(fines['Make']) for i in range(5)]})
data.update({'Model': [np.random.choice(fines['Model']) for i in range(5)]})
data.update({'Year': [np.random.choice(fines['Year']) for i in range(5)]})
fines = pd.concat([fines, pd.DataFrame(data)])


## delete the dataframe last 20 observations from the owners and add 3 new observations (they are not the same as those you add to the fines dataframe)


In [57]:
owners = owners.drop(owners.tail(20).index)

Unnamed: 0,CarNumber,SURNAME
0,Y163O8161RUS,RICHARDSON
1,E432XX77RUS,ROSS
2,7184TT36RUS,MORGAN
3,X582HE161RUS,BAILEY
4,92918M178RUS,LOPEZ
...,...,...
155,H982HY161RUS,BROOKS
156,M741T9152RUS,JOHNSON
157,8611T8154RUS,JACKSON
158,Y232E877RUS,JOHNSON


## join both dataframes

In [58]:
fines.join(owners.set_index('CarNumber'), on='CarNumber', how='inner')

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year,SURNAME
0,Y163O8161RUS,2,3200.00,Ford,Focus,1989,RICHARDSON
0,Y163O8161RUS,2,3200.00,Ford,Focus,1989,ROBERTS
0,Y163O8161RUS,2,3200.00,Ford,Focus,1989,BENNETT
720,Y163O8161RUS,2,1600.00,Ford,Focus,1999,RICHARDSON
720,Y163O8161RUS,2,1600.00,Ford,Focus,1999,ROBERTS
...,...,...,...,...,...,...,...
715,O136HO197RUS,2,7800.00,Toyota,Corolla,1989,ROBINSON
719,O22097197RUS,1,24300.00,Ford,Focus,1983,ROGERS
721,M0309X197RUS,1,22300.00,Ford,Focus,2018,GOMEZ
722,O673E8197RUS,2,600.00,Ford,Focus,2005,CARTER


In [59]:
fines.join(owners.set_index('CarNumber'), on='CarNumber', how='outer')

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year,SURNAME
0,Y163O8161RUS,2,3200.00,Ford,Focus,1989,RICHARDSON
0,Y163O8161RUS,2,3200.00,Ford,Focus,1989,ROBERTS
0,Y163O8161RUS,2,3200.00,Ford,Focus,1989,BENNETT
720,Y163O8161RUS,2,1600.00,Ford,Focus,1999,RICHARDSON
720,Y163O8161RUS,2,1600.00,Ford,Focus,1999,ROBERTS
...,...,...,...,...,...,...,...
715,O136HO197RUS,2,7800.00,Toyota,Corolla,1989,ROBINSON
719,O22097197RUS,1,24300.00,Ford,Focus,1983,ROGERS
721,M0309X197RUS,1,22300.00,Ford,Focus,2018,GOMEZ
722,O673E8197RUS,2,600.00,Ford,Focus,2005,CARTER


In [60]:
fines.join(owners.set_index('CarNumber'), on='CarNumber', how='left')

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year,SURNAME
0,Y163O8161RUS,2,3200.00,Ford,Focus,1989,RICHARDSON
0,Y163O8161RUS,2,3200.00,Ford,Focus,1989,ROBERTS
0,Y163O8161RUS,2,3200.00,Ford,Focus,1989,BENNETT
1,E432XX77RUS,1,6500.00,Toyota,Camry,1995,ROSS
1,E432XX77RUS,1,6500.00,Toyota,Camry,1995,HUGHES
...,...,...,...,...,...,...,...
2,92918M178RUS,1,800.00,Ford,Passat,1989,FOSTER
2,92918M178RUS,1,800.00,Ford,Passat,1989,NGUYEN
3,7184TT36RUS,1,24000.00,Ford,Golf,1983,MORGAN
4,C410X938RUS,2,7500.00,Ford,Focus,1990,THOMPSON


In [61]:
fines.join(owners.set_index('CarNumber'), on='CarNumber', how='right')

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year,SURNAME
0,Y163O8161RUS,2,3200.00,Ford,Focus,1989,RICHARDSON
720,Y163O8161RUS,2,1600.00,Ford,Focus,1999,RICHARDSON
137,Y163O8161RUS,2,3000.00,Ford,Focus,2005,RICHARDSON
181,Y163O8161RUS,2,8594.59,Ford,Focus,2002,RICHARDSON
188,Y163O8161RUS,2,3300.00,Ford,Focus,1990,RICHARDSON
...,...,...,...,...,...,...,...
291,M0269X197RUS,2,8594.59,Ford,Focus,2004,PATEL
428,M0269X197RUS,2,20600.00,Ford,Focus,1985,PATEL
11,M0269X197RUS,1,18500.00,Ford,Focus,1992,PATEL
32,M0269X197RUS,1,4400.00,Ford,Focus,1980,PATEL


## create a pivot table from the fines dataframe, it should look like this (the values are the sums of the fines), but with all the years

In [62]:
pd.pivot_table(fines, index=['Make', 'Model'], columns=['Year'], values='Fines', aggfunc='sum')

Unnamed: 0_level_0,Year,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
Make,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Ford,Focus,65300.0,379483.76,87700.0,163994.59,138294.59,365589.17,107394.59,95189.17,45978.35,173789.17,...,132494.59,118900.0,102994.59,150789.17,246389.17,72983.76,364300.0,144789.17,100794.59,86694.59
Ford,Golf,,,,24000.0,,,,,,,...,,,,,,,,,,
Ford,Mondeo,,,46200.0,,,,,,,,...,,,,,41100.0,,,,8600.0,
Ford,Passat,,,,,,,,180000.0,,800.0,...,,,,,,,,,,
Skoda,Octavia,11194.59,12900.0,8894.59,,300.0,8594.59,,2000.0,5100.0,11094.59,...,,2500.0,10500.0,1700.0,11900.0,6700.0,26789.17,35700.0,2400.0,153200.0
Toyota,Camry,12000.0,,,,1000.0,,19800.0,,,800.0,...,,22400.0,,7500.0,10600.0,,,,2200.0,12000.0
Toyota,Corolla,,40600.0,,12800.0,,6800.0,,54300.0,,7800.0,...,8594.59,6000.0,,,,,,,,
Volkswagen,Focus,,,,,,,,,,,...,,,,,,,,,,
Volkswagen,Golf,20800.0,8594.59,5000.0,200.0,,168000.0,,,,300.0,...,,,,,4200.0,13900.0,,,,
Volkswagen,Jetta,,1000.0,,,,9000.0,,,46000.0,,...,,,,,,,,,,


In [63]:
fines.to_csv('data/fines.csv', index=False)
owners.to_csv('data/owners.csv', index=False)