In [31]:
import pandas as pd
import numpy as np
import requests

## read the JSON file

In [32]:
df = pd.read_json(
    "../data/auto.json"
)
pd.options.display.float_format = '{:,.2f}'.format
df

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
0,Y163O8161RUS,2,3200.00,Ford,Focus
1,E432XX77RUS,1,6500.00,Toyota,Camry
2,7184TT36RUS,1,2100.00,Ford,Focus
3,X582HE161RUS,2,2000.00,Ford,Focus
4,92918M178RUS,1,5700.00,Ford,Focus
...,...,...,...,...,...
720,Y163O8161RUS,2,1600.00,Ford,Focus
721,M0309X197RUS,1,22300.00,Ford,Focus
722,O673E8197RUS,2,600.00,Ford,Focus
723,8610T8154RUS,1,2000.00,Ford,Focus


## enrich the dataframe

In [33]:
sample = df.sample(200, random_state=21)
sample


Unnamed: 0,CarNumber,Refund,Fines,Make,Model
445,M0299X197RUS,2,19200.00,Ford,Focus
22,83298C154RUS,2,8594.59,Ford,Focus
93,H957HY161RUS,1,2000.00,Ford,Focus
173,T941CC96RUS,1,2000.00,Ford,Focus
697,H966HY161RUS,1,500.00,Ford,Focus
...,...,...,...,...,...
14,8182XX154RUS,1,200.00,Ford,Focus
623,X796TH96RUS,1,500.00,Ford,Focus
498,T011MY163RUS,2,4000.00,Ford,Focus
536,T341CC96RUS,2,1000.00,Volkswagen,Passat


In [34]:
sample['Fines'] = df['Fines'].sample(200, random_state=42).values
sample['Refund'] = df['Refund'].sample(200, random_state=42).values
sample

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
445,M0299X197RUS,1,28100.00,Ford,Focus
22,83298C154RUS,2,13500.00,Ford,Focus
93,H957HY161RUS,2,45000.00,Ford,Focus
173,T941CC96RUS,1,9200.00,Ford,Focus
697,H966HY161RUS,1,1000.00,Ford,Focus
...,...,...,...,...,...
14,8182XX154RUS,1,7800.00,Ford,Focus
623,X796TH96RUS,1,200.00,Ford,Focus
498,T011MY163RUS,2,21800.00,Ford,Focus
536,T341CC96RUS,2,9500.00,Volkswagen,Passat


In [35]:
concat_rows = pd.concat([df, sample])
concat_rows.reset_index(inplace=True, drop=True)
concat_rows

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
0,Y163O8161RUS,2,3200.00,Ford,Focus
1,E432XX77RUS,1,6500.00,Toyota,Camry
2,7184TT36RUS,1,2100.00,Ford,Focus
3,X582HE161RUS,2,2000.00,Ford,Focus
4,92918M178RUS,1,5700.00,Ford,Focus
...,...,...,...,...,...
920,8182XX154RUS,1,7800.00,Ford,Focus
921,X796TH96RUS,1,200.00,Ford,Focus
922,T011MY163RUS,2,21800.00,Ford,Focus
923,T341CC96RUS,2,9500.00,Volkswagen,Passat


## enrich concat_rows

In [36]:
np.random.seed(21)
years = pd.Series(np.random.randint(1980, 2020, (len(concat_rows))), name="Years")
fines = pd.concat([concat_rows, years], axis=1)
fines

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Years
0,Y163O8161RUS,2,3200.00,Ford,Focus,1989
1,E432XX77RUS,1,6500.00,Toyota,Camry,1995
2,7184TT36RUS,1,2100.00,Ford,Focus,1984
3,X582HE161RUS,2,2000.00,Ford,Focus,2015
4,92918M178RUS,1,5700.00,Ford,Focus,2014
...,...,...,...,...,...,...
920,8182XX154RUS,1,7800.00,Ford,Focus,1981
921,X796TH96RUS,1,200.00,Ford,Focus,1992
922,T011MY163RUS,2,21800.00,Ford,Focus,2007
923,T341CC96RUS,2,9500.00,Volkswagen,Passat,2005


## enrich with another dataframe

In [37]:
names = pd.read_json("../data/surname.json")
num_unique = len(fines.CarNumber.unique())
new_header = names.iloc[0]
names = names[1:]
names.columns = new_header
owners = pd.concat([fines.CarNumber.sample(100).reset_index(drop=True), names.NAME.reset_index(drop=True)], axis=1)
owners

Unnamed: 0,CarNumber,NAME
0,9935TE152RUS,ADAMS
1,H115YO163RUS,ALLEN
2,704987163RUS,ALVAREZ
3,X522OM161RUS,ANDERSON
4,O481OH77RUS,BAILEY
...,...,...
95,T395OT197RUS,WILLIAMS
96,Y316E877RUS,WILSON
97,7840C8197RUS,WOOD
98,7844C8197RUS,WRIGHT


In [38]:
to_add = ["8976TT36US", 3, 7000.0, "Tesla", "Model Y", 2020]
fines.loc[len(fines)] = to_add
to_add = ["9000MAT9US", 1, 300.0, "Tesla", "Model X", 2021]
fines.loc[len(fines)] = to_add
to_add = ["KO6TT36US", 5, 57000.0, "Tesla", "Model M", 2022]
fines.loc[len(fines)] = to_add
to_add = ["KOTT9736US", 1, 100.0, "Tesla", "Model Z", 2019]
fines.loc[len(fines)] = to_add
to_add = ["POST36US", 1, 700.0, "Tesla", "Model S", 2018]
fines.loc[len(fines)] = to_add
fines

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Years
0,Y163O8161RUS,2,3200.00,Ford,Focus,1989
1,E432XX77RUS,1,6500.00,Toyota,Camry,1995
2,7184TT36RUS,1,2100.00,Ford,Focus,1984
3,X582HE161RUS,2,2000.00,Ford,Focus,2015
4,92918M178RUS,1,5700.00,Ford,Focus,2014
...,...,...,...,...,...,...
925,8976TT36US,3,7000.00,Tesla,Model Y,2020
926,9000MAT9US,1,300.00,Tesla,Model X,2021
927,KO6TT36US,5,57000.00,Tesla,Model M,2022
928,KOTT9736US,1,100.00,Tesla,Model Z,2019


In [39]:
to_drop = fines.CarNumber.iloc[-20:]
owners[~owners['CarNumber'].isin(to_drop)]
to_add = ["8976TT36EU", "IVANOV"]
owners.loc[len(owners)] = to_add
to_add = ["9000MAT9EU", "PETROV"]
owners.loc[len(owners)] = to_add
to_add = ["KO6TT36EU", "KIM"]
owners.loc[len(owners)] = to_add
owners

Unnamed: 0,CarNumber,NAME
0,9935TE152RUS,ADAMS
1,H115YO163RUS,ALLEN
2,704987163RUS,ALVAREZ
3,X522OM161RUS,ANDERSON
4,O481OH77RUS,BAILEY
...,...,...
98,7844C8197RUS,WRIGHT
99,8603T8154RUS,YOUNG
100,8976TT36EU,IVANOV
101,9000MAT9EU,PETROV


the new dataframe should have only the car numbers that exist in both
dataframes

In [40]:
res = pd.concat([fines[fines.CarNumber.isin(owners.CarNumber)], owners[owners.CarNumber.isin(fines.CarNumber)]]).reset_index(drop=True)
res

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Years,NAME
0,O21997197RUS,1.00,2000.00,Ford,Focus,1992.00,
1,E445TC197RUS,1.00,8594.59,Ford,Focus,1985.00,
2,8440XX154RUS,1.00,6200.00,Ford,Focus,1996.00,
3,83298C154RUS,2.00,8594.59,Ford,Focus,2013.00,
4,Y7659C197RUS,2.00,8594.59,Ford,Focus,1992.00,
...,...,...,...,...,...,...,...
298,T395OT197RUS,,,,,,WILLIAMS
299,Y316E877RUS,,,,,,WILSON
300,7840C8197RUS,,,,,,WOOD
301,7844C8197RUS,,,,,,WRIGHT


the new dataframe should have all the car numbers that exist in both dataframes

In [41]:
res = pd.concat([fines, owners]).reset_index(drop=True)
res

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Years,NAME
0,Y163O8161RUS,2.00,3200.00,Ford,Focus,1989.00,
1,E432XX77RUS,1.00,6500.00,Toyota,Camry,1995.00,
2,7184TT36RUS,1.00,2100.00,Ford,Focus,1984.00,
3,X582HE161RUS,2.00,2000.00,Ford,Focus,2015.00,
4,92918M178RUS,1.00,5700.00,Ford,Focus,2014.00,
...,...,...,...,...,...,...,...
1028,7844C8197RUS,,,,,,WRIGHT
1029,8603T8154RUS,,,,,,YOUNG
1030,8976TT36EU,,,,,,IVANOV
1031,9000MAT9EU,,,,,,PETROV


the new dataframe should have only the car numbers from the fines dataframe

In [42]:
res = pd.concat([fines, owners[owners.CarNumber.isin(fines.CarNumber)]]).reset_index(drop=True)
res

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Years,NAME
0,Y163O8161RUS,2.00,3200.00,Ford,Focus,1989.00,
1,E432XX77RUS,1.00,6500.00,Toyota,Camry,1995.00,
2,7184TT36RUS,1.00,2100.00,Ford,Focus,1984.00,
3,X582HE161RUS,2.00,2000.00,Ford,Focus,2015.00,
4,92918M178RUS,1.00,5700.00,Ford,Focus,2014.00,
...,...,...,...,...,...,...,...
1025,T395OT197RUS,,,,,,WILLIAMS
1026,Y316E877RUS,,,,,,WILSON
1027,7840C8197RUS,,,,,,WOOD
1028,7844C8197RUS,,,,,,WRIGHT


the new dataframe should have only the car numbers from the owners dataframe

In [43]:
res = pd.concat([fines[fines.CarNumber.isin(owners.CarNumber)], owners]).reset_index(drop=True)
res

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Years,NAME
0,O21997197RUS,1.00,2000.00,Ford,Focus,1992.00,
1,E445TC197RUS,1.00,8594.59,Ford,Focus,1985.00,
2,8440XX154RUS,1.00,6200.00,Ford,Focus,1996.00,
3,83298C154RUS,2.00,8594.59,Ford,Focus,2013.00,
4,Y7659C197RUS,2.00,8594.59,Ford,Focus,1992.00,
...,...,...,...,...,...,...,...
301,7844C8197RUS,,,,,,WRIGHT
302,8603T8154RUS,,,,,,YOUNG
303,8976TT36EU,,,,,,IVANOV
304,9000MAT9EU,,,,,,PETROV


## a pivot table

In [44]:
pd.pivot_table(fines, index=('Make', 'Model'), values='Fines', aggfunc=np.sum, columns='Years', fill_value='nan')

Unnamed: 0_level_0,Years,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
Make,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Audi,Audi,,,,,,,,,,2000.0,...,,,,,,,,,,
BMW,BMW,,,,,,,,,,,...,,,8594.59,,,6500.0,,,,
Ford,Focus,63789.17,398289.17,153078.35,108100.0,177594.59,150283.76,100294.59,126594.59,89494.59,70000.0,...,172683.76,122689.17,218594.59,191889.17,308789.17,284789.17,71994.59,,,
Ford,Mondeo,,,,,,,,,,8600.0,...,,,,46200.0,,,,,,
Skoda,Octavia,3900.0,,7500.0,11594.59,,10294.59,600.0,5200.0,2000.0,91400.0,...,12594.59,300.0,46394.59,300.0,3000.0,156200.0,9500.0,,,
Tesla,Model M,,,,,,,,,,,...,,,,,,,,,,57000.0
Tesla,Model S,,,,,,,,,,,...,,,,,,700.0,,,,
Tesla,Model X,,,,,,,,,,,...,,,,,,,,,300.0,
Tesla,Model Y,,,,,,,,,,,...,,,,,,,,7000.0,,
Tesla,Model Z,,,,,,,,,,,...,,,,,,,100.0,,,


## save both the fines and owners dataframes to CSV files without an index

In [45]:
fines.to_csv('fines.csv', index=False)
owners.to_csv('owners.csv', index=False)