# Pandas!

In [20]:
import pandas as pd
import numpy as np

In [21]:
S_1 = pd.Series([50, 2050, 900, 96, 11])

In [22]:
S_1.name='Gibberish'

In [23]:
S_1

0      50
1    2050
2     900
3      96
4      11
Name: Gibberish, dtype: int64

In [24]:
type(S_1)

pandas.core.series.Series

In [25]:
type(S_1.values)

numpy.ndarray

In [26]:
S_1[0:-1]

0      50
1    2050
2     900
3      96
Name: Gibberish, dtype: int64

In [27]:
S_1.index

RangeIndex(start=0, stop=5, step=1)

We can change the index of a series to anything we want

In [28]:
S_1.index = ['Item1', 'Item2', 'Item3','Item4','Item5']

In [29]:
S_1

Item1      50
Item2    2050
Item3     900
Item4      96
Item5      11
Name: Gibberish, dtype: int64

In [30]:
pd.Series(np.arange(5), index=['Item1', 'Item2', 'Item3', 'Item4', 'Item5'])

Item1    0
Item2    1
Item3    2
Item4    3
Item5    4
dtype: int64

or it can be done like a dictionary:

In [52]:
pop = pd.Series({
    'Rome' : 2.873,
    'Albuquerque' : 0.561,
    'Socorro': 0.008,
    'Shanghai': 26.32,
}, name="City population in millions")

In [32]:
pop

Rome            2.873
Albuquerque     0.561
Socorro         0.008
Shanghai       26.320
Name: City population in millions, dtype: float64

Created index labels are now how we access the data

In [33]:
pop.name

'City population in millions'

In [34]:
pop['Rome']

2.873

In [35]:
pop.iloc[0]

2.873

In [36]:
pop[0]

  pop[0]


2.873

Actual index numbers can be used as well with the .iloc feature.
Using array notation spits out issues

In [37]:
pop['Rome':'Shanghai']

Rome            2.873
Albuquerque     0.561
Socorro         0.008
Shanghai       26.320
Name: City population in millions, dtype: float64

Slices include the endpoints

# Operations and Boolean series

In [38]:
pop * 500000

Rome            1436500.0
Albuquerque      280500.0
Socorro            4000.0
Shanghai       13160000.0
Name: City population in millions, dtype: float64

In [39]:
pop

Rome            2.873
Albuquerque     0.561
Socorro         0.008
Shanghai       26.320
Name: City population in millions, dtype: float64

In [40]:
pop <= 2

Rome           False
Albuquerque     True
Socorro         True
Shanghai       False
Name: City population in millions, dtype: bool

In [41]:
pop[pop>1]

Rome         2.873
Shanghai    26.320
Name: City population in millions, dtype: float64

In [42]:
pop[pop==2]

Series([], Name: City population in millions, dtype: float64)

Returns nothing as nothing = 2

In [43]:
pop.mean()

7.4405

In [44]:
pop.std()

12.647360106625678

In [45]:
np.log(pop)

Rome           1.055357
Albuquerque   -0.578034
Socorro       -4.828314
Shanghai       3.270329
Name: City population in millions, dtype: float64

In [46]:
pop['Socorro'] = 500

In [47]:
pop

Rome             2.873
Albuquerque      0.561
Socorro        500.000
Shanghai        26.320
Name: City population in millions, dtype: float64

In [48]:
pop[pop < 50] = 5000

In [49]:
pop

Rome           5000.0
Albuquerque    5000.0
Socorro         500.0
Shanghai       5000.0
Name: City population in millions, dtype: float64

# Other stuff

In [50]:
from datetime import date
today=date.today()
pd.date_range(start='2024-3-28', end=today, periods=50)

DatetimeIndex([          '2024-03-28 00:00:00',
               '2024-03-28 10:17:08.571428571',
               '2024-03-28 20:34:17.142857142',
               '2024-03-29 06:51:25.714285714',
               '2024-03-29 17:08:34.285714285',
               '2024-03-30 03:25:42.857142857',
               '2024-03-30 13:42:51.428571428',
                         '2024-03-31 00:00:00',
               '2024-03-31 10:17:08.571428571',
               '2024-03-31 20:34:17.142857142',
               '2024-04-01 06:51:25.714285714',
               '2024-04-01 17:08:34.285714285',
               '2024-04-02 03:25:42.857142857',
               '2024-04-02 13:42:51.428571428',
                         '2024-04-03 00:00:00',
               '2024-04-03 10:17:08.571428571',
               '2024-04-03 20:34:17.142857142',
               '2024-04-04 06:51:25.714285714',
               '2024-04-04 17:08:34.285714285',
               '2024-04-05 03:25:42.857142857',
               '2024-04-05 13:42:51.4285

https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.html