In [50]:
import pandas as pd
import numpy as np
import requests

rand = 21  # Установить на 42, для схождения значений в чеклисте
n = 200

## read the JSON file that you saved in `ex02`

In [51]:
df = pd.read_json(
    '../ex02/auto.json',
    orient='records',
)
pd.set_option(
    'display.float_format',
    '{:.2f}'.format
)

df

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
0,Y163O8161RUS,2,3200.00,Ford,Focus
1,E432XX77RUS,1,6500.00,Toyota,Camry
2,7184TT36RUS,1,2100.00,Ford,Focus
3,X582HE161RUS,2,2000.00,Ford,Focus
4,92918M178RUS,1,5700.00,Ford,Focus
...,...,...,...,...,...
720,Y163O8161RUS,2,1600.00,Ford,Focus
721,M0309X197RUS,1,22300.00,Ford,Focus
722,O673E8197RUS,2,600.00,Ford,Focus
723,8610T8154RUS,1,2000.00,Ford,Focus


## enrich the dataframe using a sample from that dataframe

### create a sample with 200 new observations with `random_state = 21` 

In [52]:
sample = df.sample(n=n, random_state=rand)
sample['Fines'] = np.random.choice(df['Fines'], size=n)
sample['Refund'] = np.random.choice(df['Refund'], size=n)

sample

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
326,H305T8197RUS,2,1000.00,Ford,Focus
518,C3829X154RUS,2,22300.00,Ford,Focus
554,Y7689C197RUS,1,1800.00,Ford,Focus
33,Y7719C197RUS,1,8594.59,Ford,Focus
375,C583EY154RUS,1,5800.00,Ford,Focus
...,...,...,...,...,...
478,8201XX154RUS,2,500.00,Ford,Focus
235,M581CH197RUS,1,8500.00,Ford,Focus
625,Y358O8197RUS,2,1000.00,Ford,Focus
507,M580CH197RUS,2,8500.00,Ford,Focus


### concatenate the sample with the initial dataframe to a new dataframe `concat_rows`

In [53]:
concat_rows = pd.concat(
    [
        df,
        sample
    ],
    ignore_index=True
)

concat_rows

Unnamed: 0,CarNumber,Refund,Fines,Make,Model
0,Y163O8161RUS,2,3200.00,Ford,Focus
1,E432XX77RUS,1,6500.00,Toyota,Camry
2,7184TT36RUS,1,2100.00,Ford,Focus
3,X582HE161RUS,2,2000.00,Ford,Focus
4,92918M178RUS,1,5700.00,Ford,Focus
...,...,...,...,...,...
920,8201XX154RUS,2,500.00,Ford,Focus
921,M581CH197RUS,1,8500.00,Ford,Focus
922,Y358O8197RUS,2,1000.00,Ford,Focus
923,M580CH197RUS,2,8500.00,Ford,Focus


## enrich the dataframe `concat_rows` by a new column with the data generated 

### create a series with the name `Year` using random integers from `1980` to `2019`

In [54]:
np.random.seed(rand)
year = pd.Series(
    np.random.randint(
        1980,
        2019,
        concat_rows.shape[0],
    ),
    name='Year'
)

year

0      2018
1      2008
2      1994
3      1987
4      2000
       ... 
920    2016
921    2006
922    2012
923    1983
924    2001
Name: Year, Length: 925, dtype: int64

### concatenate the series with the dataframe and name it `fines`

In [55]:
fines = concat_rows
fines['Year'] = year
fines

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year
0,Y163O8161RUS,2,3200.00,Ford,Focus,2018
1,E432XX77RUS,1,6500.00,Toyota,Camry,2008
2,7184TT36RUS,1,2100.00,Ford,Focus,1994
3,X582HE161RUS,2,2000.00,Ford,Focus,1987
4,92918M178RUS,1,5700.00,Ford,Focus,2000
...,...,...,...,...,...,...
920,8201XX154RUS,2,500.00,Ford,Focus,2016
921,M581CH197RUS,1,8500.00,Ford,Focus,2006
922,Y358O8197RUS,2,1000.00,Ford,Focus,2012
923,M580CH197RUS,2,8500.00,Ford,Focus,1983


## enrich the dataframe with the data from another dataframe

#### create a new series with the surnames

In [56]:
surnames = pd.read_json('../../datasets/surname.json', orient='columns')
surnames.columns = surnames.iloc[0]
surnames.drop(
    0,
    inplace=True
)
surnames = surnames['NAME'].sample(
    n=fines['CarNumber'].unique().shape[0],
    random_state=rand,
    replace=True
)
surnames

52       MENDOZA
93          WARD
15       COLLINS
72          REED
61        NELSON
         ...    
69         PRICE
100        YOUNG
34     HERNANDEZ
52       MENDOZA
95         WHITE
Name: NAME, Length: 531, dtype: object

#### create the dataframe owners with 2 columns: `CarNumber` and `SURNAME`

In [57]:
owners = pd.DataFrame(
    data={
        'CarNumber': fines['CarNumber'].unique(),
        'SURNAME': surnames.values
    },
).reset_index(drop=True)
owners

Unnamed: 0,CarNumber,SURNAME
0,Y163O8161RUS,MENDOZA
1,E432XX77RUS,WARD
2,7184TT36RUS,COLLINS
3,X582HE161RUS,REED
4,92918M178RUS,NELSON
...,...,...
526,O136HO197RUS,PRICE
527,O22097197RUS,YOUNG
528,M0309X197RUS,HERNANDEZ
529,O673E8197RUS,MENDOZA


### append 5 more observations to the fines dataframe (come up with your own ideas of CarNumber, etc.)

In [58]:
new_observ = pd.DataFrame(
    [
        ['a121345raaa', 2.0, 12345.0, 'Harry', 'Potter', 1000],
        ['33XJudas', 3.14, 1337.0, 'Jesus', 'Christ', 0],
        ['BigChungus42', 69.0, 420.0, 'Shrek', 'Swamp', 2000],
        ['Poland', 4.20, 123.0, 'BOBR', 'Kurva', 2007],
        ['DogeCoinToTheMoon', 1.618, 1000000.0, 'Elon', 'Tusk', 2023],
    ],
    columns=fines.columns
)
fines = pd.concat(
    [fines, new_observ],
    ignore_index=True
)
fines

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year
0,Y163O8161RUS,2.00,3200.00,Ford,Focus,2018
1,E432XX77RUS,1.00,6500.00,Toyota,Camry,2008
2,7184TT36RUS,1.00,2100.00,Ford,Focus,1994
3,X582HE161RUS,2.00,2000.00,Ford,Focus,1987
4,92918M178RUS,1.00,5700.00,Ford,Focus,2000
...,...,...,...,...,...,...
925,a121345raaa,2.00,12345.00,Harry,Potter,1000
926,33XJudas,3.14,1337.00,Jesus,Christ,0
927,BigChungus42,69.00,420.00,Shrek,Swamp,2000
928,Poland,4.20,123.00,BOBR,Kurva,2007


### delete the dataframe last `20` observations from the owners and add 3 new observations (they are not the same as those you add to the `fines` dataframe)

In [59]:
owners.drop(
    owners.tail(20).index,
    inplace=True
)
new_owners = pd.DataFrame(
    [
        ['asdfg', 'PERRY'],
        ['123456y8789', 'PLATYPUS'],
        ['122112e1213213', 'fufelshmerz'],
    ],
    columns=owners.columns
)
owners = pd.concat(
    [owners, new_owners],
    ignore_index=True
)
owners

Unnamed: 0,CarNumber,SURNAME
0,Y163O8161RUS,MENDOZA
1,E432XX77RUS,WARD
2,7184TT36RUS,COLLINS
3,X582HE161RUS,REED
4,92918M178RUS,NELSON
...,...,...
509,O50197197RUS,PRICE
510,7608EE777RUS,LEWIS
511,asdfg,PERRY
512,123456y8789,PLATYPUS


### join both dataframes:

#### the new dataframe should have only the car numbers that exist in both dataframes

In [60]:
fines.merge(
    owners,
    how='inner',
)

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year,SURNAME
0,Y163O8161RUS,2.00,3200.00,Ford,Focus,2018,MENDOZA
1,E432XX77RUS,1.00,6500.00,Toyota,Camry,2008,WARD
2,7184TT36RUS,1.00,2100.00,Ford,Focus,1994,COLLINS
3,X582HE161RUS,2.00,2000.00,Ford,Focus,1987,REED
4,92918M178RUS,1.00,5700.00,Ford,Focus,2000,NELSON
...,...,...,...,...,...,...,...
895,8201XX154RUS,2.00,500.00,Ford,Focus,2016,WALKER
896,M581CH197RUS,1.00,8500.00,Ford,Focus,2006,HALL
897,Y358O8197RUS,2.00,1000.00,Ford,Focus,2012,MORRIS
898,M580CH197RUS,2.00,8500.00,Ford,Focus,1983,ALLEN


#### the new dataframe should have **all** the car numbers that exist in **both** dataframes

In [61]:
fines.merge(
    owners,
    how='outer',
)

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year,SURNAME
0,122112e1213213,,,,,,fufelshmerz
1,123456y8789,,,,,,PLATYPUS
2,33XJudas,3.14,1337.00,Jesus,Christ,0.00,
3,704687163RUS,2.00,1400.00,Ford,Focus,1980.00,NGUYEN
4,704787163RUS,2.00,2800.00,Ford,Focus,1982.00,ALVAREZ
...,...,...,...,...,...,...,...
928,Y973O8197RUS,2.00,8594.59,Ford,Focus,2014.00,BENNETT
929,Y973O8197RUS,1.00,34800.00,Ford,Focus,1981.00,BENNETT
930,Y973O8197RUS,1.00,69600.00,Ford,Focus,2008.00,BENNETT
931,a121345raaa,2.00,12345.00,Harry,Potter,1000.00,


#### the new dataframe should have only the car numbers from the `fines` dataframe 

In [62]:
fines.merge(
    owners,
    how='left',
)

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year,SURNAME
0,Y163O8161RUS,2.00,3200.00,Ford,Focus,2018,MENDOZA
1,E432XX77RUS,1.00,6500.00,Toyota,Camry,2008,WARD
2,7184TT36RUS,1.00,2100.00,Ford,Focus,1994,COLLINS
3,X582HE161RUS,2.00,2000.00,Ford,Focus,1987,REED
4,92918M178RUS,1.00,5700.00,Ford,Focus,2000,NELSON
...,...,...,...,...,...,...,...
925,a121345raaa,2.00,12345.00,Harry,Potter,1000,
926,33XJudas,3.14,1337.00,Jesus,Christ,0,
927,BigChungus42,69.00,420.00,Shrek,Swamp,2000,
928,Poland,4.20,123.00,BOBR,Kurva,2007,


#### the new dataframe should have only the car numbers from the `owners` dataframe

In [63]:
fines.merge(
    owners,
    how='right',
)

Unnamed: 0,CarNumber,Refund,Fines,Make,Model,Year,SURNAME
0,Y163O8161RUS,2.00,3200.00,Ford,Focus,2018.00,MENDOZA
1,Y163O8161RUS,2.00,1600.00,Ford,Focus,1997.00,MENDOZA
2,Y163O8161RUS,1.00,800.00,Ford,Focus,1990.00,MENDOZA
3,E432XX77RUS,1.00,6500.00,Toyota,Camry,2008.00,WARD
4,E432XX77RUS,2.00,13000.00,Toyota,Camry,2004.00,WARD
...,...,...,...,...,...,...,...
898,O50197197RUS,2.00,7800.00,Ford,Focus,2009.00,PRICE
899,7608EE777RUS,1.00,4000.00,Skoda,Octavia,1996.00,LEWIS
900,asdfg,,,,,,PERRY
901,123456y8789,,,,,,PLATYPUS


## create a pivot table from the `fines` dataframe, it should look like this (the values are the sums of the fines), but with all the years (the values may be different for you):

In [64]:
pivot = fines.pivot_table(
    values='Fines',
    index=['Make', 'Model'],
    columns='Year',
)
pivot

Unnamed: 0_level_0,Year,0,1000,1980,1981,1982,1983,1984,1985,1986,1987,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2023
Make,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
BOBR,Kurva,,,,,,,,,,,...,,,,,,,,,,
Elon,Tusk,,,,,,,,,,,...,,,,,,,,,,1000000.0
Ford,Focus,,,9986.49,6290.42,5639.28,5368.41,10068.0,10710.24,8215.38,5017.39,...,7980.91,12287.92,5585.52,6958.51,7127.35,6694.72,16863.78,8536.48,10112.81,
Ford,Mondeo,,,,,,,,,,,...,,34400.0,,,,8600.0,,,,
Harry,Potter,,12345.0,,,,,,,,,...,,,,,,,,,,
Jesus,Christ,1337.0,,,,,,,,,,...,,,,,,,,,,
Shrek,Swamp,,,,,,,,,,,...,,,,,,,,,,
Skoda,Octavia,,,1600.0,9300.0,53150.0,54400.0,,1050.0,8500.0,,...,,1000.0,8000.0,,8594.59,,3700.0,,2825.0,
Toyota,Camry,,,8594.59,,,,,,,,...,7500.0,,22400.0,,8594.59,,,7200.0,19800.0,
Toyota,Corolla,,,34300.0,7600.0,3400.0,,3200.0,8594.59,,500.0,...,,11400.0,,3100.0,5650.0,,4747.29,24000.0,,


## save both the `fines` and `owners` dataframes to CSV files without an index

In [65]:
fines.to_csv(
    'fines.csv',
    sep=',',
    index=False,
)

owners.to_csv(
    'owners.csv',
    sep=',',
    index=False,
)