### Series

In [2]:
import pandas as pd
import numpy as np

In [5]:
# pandas series
# if no index is passed, a range(len(series)) will be used as index

pd.Series([1, 2, 3], index=["a", "g", "j"])

a    1
g    2
j    3
dtype: int64

In [6]:
# create series from list
pd.Series([1, 2, 3])

0    1
1    2
2    3
dtype: int64

In [7]:
# create series from NumPy array
pd.Series(np.array([1, 2, 3]))

0    1
1    2
2    3
dtype: int64

In [8]:
# create series from dictionary
# if no index is passed, the keys will become the index 

dict_to_series = {
    "A": 1,
    "B": 2,
    "C": 3
}

pd.Series(dict_to_series)

A    1
B    2
C    3
dtype: int64

In [15]:
# series can contain not only integers, but also strings, booleans, floats, ...
series = pd.Series(["string1", True, 1.5764442, 1000000])

series

0    string1
1       True
2    1.57644
3    1000000
dtype: object

In [54]:
# get a slice of a series by using brackets []

# first element
print(series[0])

# last two elements
print(series[1:2])

# everything but the last element
print(series[:-1])

string1
1    BLABLABLA
dtype: object
0      string1
1    BLABLABLA
2      1.57644
dtype: object


In [56]:
# you can also use loc for slicing
series.loc[1:]

1    BLABLABLA
2      1.57644
3      1000000
dtype: object

In [18]:
# you use iloc as well
series.iloc[2]

1.5764442

In [57]:
# filtering a series

ser = pd.Series(range(20))

ser[ser>15]

16    16
17    17
18    18
19    19
dtype: int64

In [19]:
#shape of a series: (number of elements, )

series.shape

(4,)

In [21]:
# by using shape we can get the dimensions of a series

length_of_series = series.shape[0]
length_of_series

4

In [23]:
# get the type of element

series.dtypes  # object-like style, because it contains a mix of types

dtype('O')

In [24]:
pd.Series([1, 2, 3]).dtype

dtype('int64')

In [25]:
# mutating series

series[1] = "BLABLABLA"

series

0      string1
1    BLABLABLA
2      1.57644
3      1000000
dtype: object

In [26]:
# get elements of a series:

series.get(0)  # using the index 

'string1'

### DataFrames

In [27]:
# creating a dataframe from dict of series

series1 = pd.Series(["a", "b", "c"], )
series2 = pd.Series(["f", "g", "h"], )

dict_series = {
    "variable1": series1,
    "variable2": series2
}

pd.DataFrame(dict_series)

Unnamed: 0,variable1,variable2
0,a,f
1,b,g
2,c,h


In [29]:
# creating a dataframe from dict of lists

dict_lists = {
    "var1": ["Good", "Average", "Bad"],
    "var2": [32, 6, 1],
    "var3": [False, True, False],
    "var4": [178, 60, 40]
}

pd.DataFrame(data)

Unnamed: 0,var1,var2,var3,var4
0,Good,32,False,178
1,Average,6,True,60
2,Bad,1,False,40


In [49]:
# creating a dataframe from a list of dicts
# the keys in each dictionary must be equal, and will be the columns
list_of_dicts = [
    {"Name": "Daniel", "Age": 32, "Furry": False, "Height": 178},
    {"Name": "Churro", "Age": 6, "Furry": True, "Height": 60},
    {"Name": "Plant", "Age": 1, "Furry": False, "Height": 40},
]

df = pd.DataFrame(list_of_dicts)
df

Unnamed: 0,Name,Age,Furry,Height
0,Daniel,32,False,178
1,Churro,6,True,60
2,Plant,1,False,40


In [31]:
# get the columns
df.columns

Index(['Name', 'Age', 'Furry', 'Height'], dtype='object')

In [32]:
# get the shape: (rows, columns)
df.shape

(3, 4)

In [27]:
# get a single column by its name: returns a pd.Series
df["Name"]

0    Daniel
1    Churro
2     Plant
Name: Name, dtype: object

In [33]:
# can also be done like this, not recommended
df.Name

0    Daniel
1    Churro
2     Plant
Name: Name, dtype: object

In [35]:
# slicing dataframe: iloc

# first two columns
df.iloc[:, :2]

Unnamed: 0,Name,Age
0,Daniel,32
1,Churro,6
2,Plant,1


In [36]:
# filtering

df[df["Furry"]==True]

Unnamed: 0,Name,Age,Furry,Height
1,Churro,6,True,60


In [37]:
#filtering
# can be passed several conditions and different logic operators
# & (and), | (or), ...
df[
    (df["Furry"]==False) &
    (df["Height"]<60)
]

Unnamed: 0,Name,Age,Furry,Height
2,Plant,1,False,40


In [50]:
# adding columns

df["is_cool"] = [True, True, False]  # from scratch
df["Height (m)"] = df["Height"] / 100  # performing operations on existing columns

# using assign method to create columns based on other columns
df = df.assign(age_in_months=lambda x: x["Age"] * 12)

# deleting columns: pop, drop
df.pop("Age")

df

Unnamed: 0,Name,Furry,Height,is_cool,Height (m),age_in_months
0,Daniel,False,178,True,1.78,384
1,Churro,True,60,True,0.6,72
2,Plant,False,40,False,0.4,12


In [40]:
# mutating values in df

df.loc[0, "Name"] = "Juan"
df

Unnamed: 0,Name,Furry,Height,is_cool,Height (m)
0,Juan,False,178,True,1.78
1,Churro,True,60,True,0.6
2,Plant,False,40,False,0.4


In [51]:
# types of data per column

df.dtypes

Name              object
Furry               bool
Height             int64
is_cool             bool
Height (m)       float64
age_in_months      int64
dtype: object

In [41]:
# super useful function when first facing a df

df.describe()

Unnamed: 0,Age,Height
count,3.0,3.0
mean,13.0,0.926667
std,16.643317,0.745743
min,1.0,0.4
25%,3.5,0.5
50%,6.0,0.6
75%,19.0,1.19
max,32.0,1.78


In [53]:
# transposing a df

df.T  # looks like a property, acts like a method *facepalms*

Unnamed: 0,0,1,2
Name,Daniel,Churro,Plant
Furry,False,True,False
Height,178,60,40
is_cool,True,True,False
Height (m),1.78,0.6,0.4
age_in_months,384,72,12
