# Pandas part 2

### Read the content of the `data_export.csv` file saved at the end of the previous exercise into the dataframe so that the `Unnamed 0` column is not created when the file is read.

In [1]:
import pandas as pd
testdata_2 = pd.read_csv('data_export.csv', index_col = 0)
testdata_2

Unnamed: 0,column1,column2,column3
0,,0.380058,0.919389
1,0.922251,0.490721,0.133932
3,0.363707,0.878349,0.4857
6,0.323134,0.036466,0.186775
7,0.46761,0.850993,0.377558
8,0.303299,0.096971,0.451031
9,0.733485,0.591996,0.658924
10,0.641628,0.251886,0.274987
12,0.319906,0.4892,0.990319
13,0.780267,0.458454,0.053428


### Drop the rows with `NaN` values.

In [2]:
testdata_2 = testdata_2.dropna()
testdata_2

Unnamed: 0,column1,column2,column3
1,0.922251,0.490721,0.133932
3,0.363707,0.878349,0.4857
6,0.323134,0.036466,0.186775
7,0.46761,0.850993,0.377558
8,0.303299,0.096971,0.451031
9,0.733485,0.591996,0.658924
10,0.641628,0.251886,0.274987
12,0.319906,0.4892,0.990319
13,0.780267,0.458454,0.053428
15,0.935875,0.698276,0.446885


### Change all values ​​in the fifth and tenth row to `NaN`.

In [3]:
import numpy as np
testdata_2 = testdata_2.dropna().copy()
testdata_2.iloc[[4, 9], :] = np.nan
testdata_2

Unnamed: 0,column1,column2,column3
1,0.922251,0.490721,0.133932
3,0.363707,0.878349,0.4857
6,0.323134,0.036466,0.186775
7,0.46761,0.850993,0.377558
8,,,
9,0.733485,0.591996,0.658924
10,0.641628,0.251886,0.274987
12,0.319906,0.4892,0.990319
13,0.780267,0.458454,0.053428
15,,,


### Print the number of `NaN` values ​​in each column from the DataFrame object.

In [4]:
nan_count = testdata_2.isna().sum()
print(nan_count)

column1    2
column2    2
column3    2
dtype: int64


### Fill in the `NaN` values ​​made above with linear interpolation.

In [5]:
testdata_2 = testdata_2.interpolate(method='linear')
testdata_2

Unnamed: 0,column1,column2,column3
1,0.922251,0.490721,0.133932
3,0.363707,0.878349,0.4857
6,0.323134,0.036466,0.186775
7,0.46761,0.850993,0.377558
8,0.600548,0.721495,0.518241
9,0.733485,0.591996,0.658924
10,0.641628,0.251886,0.274987
12,0.319906,0.4892,0.990319
13,0.780267,0.458454,0.053428
15,0.52673,0.379381,0.410849


### Change the last value of `column1` to 94 and the last value of `column3` to 45.

In [6]:
testdata_2.loc[testdata_2.index[-1], 'column1'] = 94
testdata_2.loc[testdata_2.index[-1], 'column3'] = 45
testdata_2

Unnamed: 0,column1,column2,column3
1,0.922251,0.490721,0.133932
3,0.363707,0.878349,0.4857
6,0.323134,0.036466,0.186775
7,0.46761,0.850993,0.377558
8,0.600548,0.721495,0.518241
9,0.733485,0.591996,0.658924
10,0.641628,0.251886,0.274987
12,0.319906,0.4892,0.990319
13,0.780267,0.458454,0.053428
15,0.52673,0.379381,0.410849


### Add a new column called `column4` to the DataFrame object. This column should include the sum of the columns `column1` and `column3`.

In [7]:
testdata_2['column4'] = testdata_2['column1'] + testdata_2['column3']
testdata_2

Unnamed: 0,column1,column2,column3,column4
1,0.922251,0.490721,0.133932,1.056182
3,0.363707,0.878349,0.4857,0.849407
6,0.323134,0.036466,0.186775,0.509909
7,0.46761,0.850993,0.377558,0.845168
8,0.600548,0.721495,0.518241,1.118788
9,0.733485,0.591996,0.658924,1.392409
10,0.641628,0.251886,0.274987,0.916615
12,0.319906,0.4892,0.990319,1.310225
13,0.780267,0.458454,0.053428,0.833695
15,0.52673,0.379381,0.410849,0.937579


### Reset the index of the DataFrame object and print the number of rows.

In [8]:
testdata_2.reset_index(drop=True, inplace=True)
print(len(testdata_2))

17


### Make time series data with the length of a DataFrame object in the time interval you want and make it an index column.

In [9]:
start_date = '2025-01-01'
periods = 15
frequency ='D'
date_range = pd.date_range(start=start_date, periods=periods, freq=frequency)

data = np.random.randint(0, 100, size=(periods))
timedata = pd.DataFrame(data, columns=['Events'])

timedata['Date'] = date_range
timedata.set_index('Date', inplace=True)

print(timedata)


            Events
Date              
2025-01-01      79
2025-01-02      58
2025-01-03      81
2025-01-04      25
2025-01-05      26
2025-01-06      26
2025-01-07      11
2025-01-08      44
2025-01-09      53
2025-01-10      88
2025-01-11      39
2025-01-12      75
2025-01-13      88
2025-01-14      31
2025-01-15       4


### Convert all values ​​in `column1` to strings with each zero replaced by an uppercase `ND` character combination.

In [10]:
testdata_2['column1'] = testdata_2['column1'].astype(str).str.replace('0', 'ND')
print(testdata_2)

            column1   column2    column3     column4
0     ND.92225ND592  0.490721   0.133932    1.056182
1     ND.3637ND6572  0.878349   0.485700    0.849407
2      ND.323133868  0.036466   0.186775    0.509909
3     ND.4676ND9956  0.850993   0.377558    0.845168
4   ND.6NDND5476355  0.721495   0.518241    1.118788
5      ND.733485315  0.591996   0.658924    1.392409
6      ND.641628229  0.251886   0.274987    0.916615
7     ND.3199ND5543  0.489200   0.990319    1.310225
8     ND.78ND267224  0.458454   0.053428    0.833695
9   ND.52673NDND625  0.379381   0.410849    0.937579
10    ND.2731929ND1  0.300307   0.768270    1.041463
11   ND.467ND6NDND4  0.225161   0.911570    1.378630
12   ND.3NDND588879  0.455980   0.292225    0.592814
13    ND.8599699ND1  0.346539   0.364629    1.224599
14     ND.842364415  0.093485   0.662066    1.504430
15     ND.781693324  0.573344   0.815692    1.597386
16            94.ND  0.945625  45.000000  139.000000


### Make a copy of the DataFrame object, whose column names have been changed to the following: `column5`, `column6`, `column7`, `column8`. After that, make one big DataFrame object, which consists of the original DataFrame and a copy so that they are added next to each other.


| column1 | column2 | column3 | column4 | column5 | column6 | column7 | column8 |
| ------------- |:-------------:| -----:| ------------- |:-------------:| -----:|-----:|-----:|
| 0.1 | 0.2 | 0.3 | ... | ... | ... | ... | ... |

In [11]:
testdata_2_copy = testdata_2.copy()
testdata_2_copy.columns = ['column5', 'column6', 'column7', 'column8']

combined = pd.concat([testdata_2, testdata_2_copy], axis=1).reset_index(drop=True)
combined

Unnamed: 0,column1,column2,column3,column4,column5,column6,column7,column8
0,ND.92225ND592,0.490721,0.133932,1.056182,ND.92225ND592,0.490721,0.133932,1.056182
1,ND.3637ND6572,0.878349,0.4857,0.849407,ND.3637ND6572,0.878349,0.4857,0.849407
2,ND.323133868,0.036466,0.186775,0.509909,ND.323133868,0.036466,0.186775,0.509909
3,ND.4676ND9956,0.850993,0.377558,0.845168,ND.4676ND9956,0.850993,0.377558,0.845168
4,ND.6NDND5476355,0.721495,0.518241,1.118788,ND.6NDND5476355,0.721495,0.518241,1.118788
5,ND.733485315,0.591996,0.658924,1.392409,ND.733485315,0.591996,0.658924,1.392409
6,ND.641628229,0.251886,0.274987,0.916615,ND.641628229,0.251886,0.274987,0.916615
7,ND.3199ND5543,0.4892,0.990319,1.310225,ND.3199ND5543,0.4892,0.990319,1.310225
8,ND.78ND267224,0.458454,0.053428,0.833695,ND.78ND267224,0.458454,0.053428,0.833695
9,ND.52673NDND625,0.379381,0.410849,0.937579,ND.52673NDND625,0.379381,0.410849,0.937579


### Put the last value of `column5` into a variable and the last value of `column3` into a separate variable from the newly made DataFrame object in string format. Concatenate those variables as a string and remove the last two characters from the result. Finally, remove all dots `.`.

In [12]:
last_in_column5 = str(combined['column5'].iloc[-1])
last_in_column3 = str(combined['column3'].iloc[-1])
result = (last_in_column5 + last_in_column3)
result = result[:-2]
result = result.replace(".", "")
result

'94ND45'