## Category Description:
- count: number of entries
- mean: average of entries
- std: standard deviation
- min: minimum entry
- 25%: first quantile
- 50%: second quantile
- 75%: third quantile
- max: maximum entry

In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('/home/fabian/ds_club/data/pokemon.csv')

In [3]:
data.describe()

Unnamed: 0,#,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,362.81375,435.1025,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,208.343798,119.96304,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,1.0,180.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,184.75,330.0,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,364.5,450.0,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,539.25,515.0,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,721.0,780.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


## Indexing Pandas Time Series

- datetime = object
- parse_dates(boolean): Transform date to ISO 8601 (yyyy-mm-dd hh:mm:ss) format

In [6]:
time_list = ["1992-03-08", "1992-04-12"]
print(time_list)
print(type(time_list[1]))

['1992-03-08', '1992-04-12']
<class 'str'>


In [8]:
datetime_object = pd.to_datetime(time_list)
print(type(datetime_object))

<class 'pandas.core.indexes.datetimes.DatetimeIndex'>


In [10]:
import warnings
warnings.filterwarnings("ignore")

In [12]:
data2 = data.head()
date_list = ["1992-01-10", "1992-02-10", "1992-03-10", "1993-03-15", "1993-03-16"]
datetime_object = pd.to_datetime(date_list)
data2["date"] = datetime_object

In [13]:
data2 = data2.set_index("date")
data2

Unnamed: 0_level_0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1992-01-10,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1992-02-10,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
1992-03-10,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
1993-03-15,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
1993-03-16,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [14]:
print(data2.loc["1993-03-16"])
print(data2.loc["1992-03-10":"1993-03-16"])

#                      4
Name          Charmander
Type 1              Fire
Type 2               NaN
Total                309
HP                    39
Attack                52
Defense               43
Sp. Atk               60
Sp. Def               50
Speed                 65
Generation             1
Legendary          False
Name: 1993-03-16 00:00:00, dtype: object
            #                   Name Type 1  Type 2  Total  HP  Attack  \
date                                                                     
1992-03-10  3               Venusaur  Grass  Poison    525  80      82   
1993-03-15  3  VenusaurMega Venusaur  Grass  Poison    625  80     100   
1993-03-16  4             Charmander   Fire     NaN    309  39      52   

            Defense  Sp. Atk  Sp. Def  Speed  Generation  Legendary  
date                                                                 
1992-03-10       83      100      100     80           1      False  
1993-03-15      123      122      120     80         

## Resampling Pandas Time Series

### Resampling: 
Statistical method over different time intervals
- Need to specify frequency (e.x."M"=month, "A"=year)

### Downsampling:
Reduce datetime rows to slower frequency (e.x. daily --> weekly)

### Upsampling:
Increase datetime rows to faster frequency (e.x. daily --> hourly)

### Interpolate:
Interpolate (보충하다) values according to different methods like 'linear', 'time' or 'index'

In [16]:
data2.resample("A").mean()

Unnamed: 0_level_0,#,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1992-12-31,2.0,416.0,61.666667,64.333333,65.0,81.666667,81.666667,61.666667,1.0,False
1993-12-31,3.5,467.0,59.5,76.0,83.0,91.0,85.0,72.5,1.0,False


In [17]:
data2.resample("M").mean()

Unnamed: 0_level_0,#,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1992-01-31,1.0,318.0,45.0,49.0,49.0,65.0,65.0,45.0,1.0,0.0
1992-02-29,2.0,405.0,60.0,62.0,63.0,80.0,80.0,60.0,1.0,0.0
1992-03-31,3.0,525.0,80.0,82.0,83.0,100.0,100.0,80.0,1.0,0.0
1992-04-30,,,,,,,,,,
1992-05-31,,,,,,,,,,
1992-06-30,,,,,,,,,,
1992-07-31,,,,,,,,,,
1992-08-31,,,,,,,,,,
1992-09-30,,,,,,,,,,
1992-10-31,,,,,,,,,,


In [18]:
data2.resample("M").first().interpolate("linear")

Unnamed: 0_level_0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1992-01-31,1.0,Bulbasaur,Grass,Poison,318.0,45.0,49.0,49.0,65.0,65.0,45.0,1.0,0.0
1992-02-29,2.0,Ivysaur,Grass,Poison,405.0,60.0,62.0,63.0,80.0,80.0,60.0,1.0,0.0
1992-03-31,3.0,Venusaur,Grass,Poison,525.0,80.0,82.0,83.0,100.0,100.0,80.0,1.0,0.0
1992-04-30,3.0,,,,533.333333,80.0,83.5,86.333333,101.833333,101.666667,80.0,1.0,0.0
1992-05-31,3.0,,,,541.666667,80.0,85.0,89.666667,103.666667,103.333333,80.0,1.0,0.0
1992-06-30,3.0,,,,550.0,80.0,86.5,93.0,105.5,105.0,80.0,1.0,0.0
1992-07-31,3.0,,,,558.333333,80.0,88.0,96.333333,107.333333,106.666667,80.0,1.0,0.0
1992-08-31,3.0,,,,566.666667,80.0,89.5,99.666667,109.166667,108.333333,80.0,1.0,0.0
1992-09-30,3.0,,,,575.0,80.0,91.0,103.0,111.0,110.0,80.0,1.0,0.0
1992-10-31,3.0,,,,583.333333,80.0,92.5,106.333333,112.833333,111.666667,80.0,1.0,0.0
