# Pandas data structures

In [1]:
import pandas as pd
import numpy as np

# Series

In [3]:
s = pd.Series([1,23,4])
s

0     1
1    23
2     4
dtype: int64

In [4]:
print(s.index)
print(s.values)

RangeIndex(start=0, stop=3, step=1)
[ 1 23  4]


In [9]:
s2 = pd.Series([1,23,4,5,1,1,8], index=["a", 4, True, 1,2,3,7])
s2

a        1
4       23
True     4
1        5
2        1
3        1
7        8
dtype: int64

In [14]:
s2["a"]

1

In [13]:
s2>3

a       False
4        True
True     True
1        True
2       False
3       False
7        True
dtype: bool

In [15]:
s2 * 2

a        2
4       46
True     8
1       10
2        2
3        2
7       16
dtype: int64

In [16]:
np.sqrt(s2)

a       1.000000
4       4.795832
True    2.000000
1       2.236068
2       1.000000
3       1.000000
7       2.828427
dtype: float64

In [17]:
"a" in s2 #Label search

True

In [25]:
dicData = {"a": 1, "2":2 , "c":4} #Create a serie from a dictionary
s3 = pd.Series(dicData)
s3

a    1
2    2
c    4
dtype: int64

In [22]:
2 in s3.values #Value search

True

In [27]:
s3+s2 #indexes that are not in both series are NaN

True    NaN
1       NaN
2       NaN
3       NaN
4       NaN
7       NaN
2       NaN
a       2.0
c       NaN
dtype: float64

# Data Frames

In [47]:
dfData = {"province": ["Madrid","Santander","Burgos"], "population":[4500000,200000,30000] , "year": [1950,1966,1988]}
df = pd.DataFrame(dfData)
df

Unnamed: 0,province,population,year
0,Madrid,4500000,1950
1,Santander,200000,1966
2,Burgos,30000,1988


In [49]:
df2 = pd.DataFrame(dfData , columns=["province", "population", "year", "debt"])
df2

Unnamed: 0,province,population,year,debt
0,Madrid,4500000,1950,
1,Santander,200000,1966,
2,Burgos,30000,1988,


In [50]:
df2["population"] #get column

0    4500000
1     200000
2      30000
Name: population, dtype: int64

In [51]:
df2.population

0    4500000
1     200000
2      30000
Name: population, dtype: int64

In [52]:
df2["newColumn"] = 0 #Create new column
df2

Unnamed: 0,province,population,year,debt,newColumn
0,Madrid,4500000,1950,,0
1,Santander,200000,1966,,0
2,Burgos,30000,1988,,0


In [53]:
df2.loc[1] #access rows from index 

province      Santander
population       200000
year               1966
debt                NaN
newColumn             0
Name: 1, dtype: object

In [54]:
df2.T

Unnamed: 0,0,1,2
province,Madrid,Santander,Burgos
population,4500000,200000,30000
year,1950,1966,1988
debt,,,
newColumn,0,0,0


In [55]:
df2.T.loc["year"]

0    1950
1    1966
2    1988
Name: year, dtype: object

In [57]:
df2.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
population,3.0,1576667.0,2533107.0,30000.0,115000.0,200000.0,2350000.0,4500000.0
year,3.0,1968.0,19.07878,1950.0,1958.0,1966.0,1977.0,1988.0
newColumn,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [59]:
df2.iloc[1:].T

Unnamed: 0,1,2
province,Santander,Burgos
population,200000,30000
year,1966,1988
debt,,
newColumn,0,0


# Dropping

In [83]:
s4 = pd.Series(np.arange(5) , list([5,6,7,8,9]))
s4

5    0
6    1
7    2
8    3
9    4
dtype: int64

In [84]:
s5 = s4.drop([5]) #it does not modify s4, only if we specify the "inplace=True" parameter
s5

6    1
7    2
8    3
9    4
dtype: int64

In [85]:
s4

5    0
6    1
7    2
8    3
9    4
dtype: int64

In [88]:
df3 = df2.copy().T
df3

Unnamed: 0,0,1,2
province,Madrid,Santander,Burgos
population,4500000,200000,30000
year,1950,1966,1988
debt,,,
newColumn,0,0,0
