# Pandas

## Intro to Pandas

In [2]:
import pandas as pd
import numpy as np

In [2]:
# Passing a list of strings to panda.
# Creates a one-dimensional array / series. If an index (key) is not provided, pandas automatically creates a numbered index.
names = ["Papa","Mama","Jessica"]
series1 = pd.Series(names)
series1

0       Papa
1       Mama
2    Jessica
dtype: object

In [3]:
# None values are not altered but are instead stored as-is
names = ["Papa","Mama","Jessica",None]
series1 = pd.Series(names)
series1

0       Papa
1       Mama
2    Jessica
3       None
dtype: object

In [4]:
# Passing a list of numbers to panda.
# Creates a one-dimensional array / series. If an index (key) is not provided, pandas automatically creates a numbered index.
# None values, unlike in strings, are modified into NaN (Not a Number) values which are, mentally the same as None, BUT ARE NOT THE SAME TO NONE.
numbers = [1,2,5,6,None]
series2 = pd.Series(numbers)
series2

0    1.0
1    2.0
2    5.0
3    6.0
4    NaN
dtype: float64

In [6]:
# Nan cannot be evaluated and compared with None
np.nan == None

False

In [7]:
# Nan can only be evaluated via a special funcion
np.isnan(np.nan)

True

In [8]:
# Data can be loaded with indices if its through a dictionary

sobrinos = {"Valentina":"Matematicas","Diego":"Idiomas","Alejandro":"Religion"}
series3 = pd.Series(sobrinos)
series3

Valentina    Matematicas
Diego            Idiomas
Alejandro       Religion
dtype: object

In [3]:
# Indeces can also be added manually as an argument to the constructor function with index=

clases = ["Matematica","Idiomas","Religion"]
series4 = pd.Series(clases,index=["Valentina","Diego","Alejandro"])
series4

Valentina    Matematica
Diego           Idiomas
Alejandro      Religion
dtype: object

In [12]:
# Creating a series from a dictionary normally uses the keys and the indeces, but you can also send indeces explicitly.
# If an explicitly sent index is NOT in the dictionary, pandas fills the voids with NaN

sobrinos = {"Valentina":"Matematicas","Diego":"Idiomas","Alejandro":"Religion"}
series5 = pd.Series(sobrinos,index=["Valentina","Diego","Clara"])
series5

Valentina    Matematicas
Diego            Idiomas
Clara                NaN
dtype: object

## Querying Series

In [13]:
# Index attribute returns the index section of the series

series5.index

Index(['Valentina', 'Diego', 'Clara'], dtype='object')

In [16]:
# A series can queried via the index OR a positional argument using LOC and ILOC.
# LOC and ILOC are METHODS, not FUNCTIONS. They use slicing notation [] not ().
# To query a series with an index, we use LOC.
series5.loc["Valentina"]

'Matematicas'

In [19]:
# To query a series with a positional argument, we use ILOC
series5.iloc[1]

'Idiomas'

In [22]:
# We can also add an element to a series with LOC, an Index AND the value. AS IF IT WAS A DICTIONARY.
series5.loc["Tia"] = "Yoga"
series5

Valentina    Matematicas
Diego            Idiomas
Clara                NaN
Tia                 Yoga
dtype: object

## Operations in Series

In [23]:
# Finding the sum of a series can be do programatically, but this can be inefficient and slow.

values = [100,50,70]
ser = pd.Series(values)
summ = 0
for value in ser:
    summ += value
summ

220

In [25]:
# A much more efficient method is to use numpy methods/functions which are optimized, for example, numpy's SUM.

summ = np.sum(ser)
summ

220

## Manipulating Series

In [26]:
# A series values can be modified directly by using operations on the series itself. This is called broadcating.
ser

0    100
1     50
2     70
dtype: int64

In [27]:
ser +=2
ser

0    102
1     52
2     72
dtype: int64

In [28]:
# Programatically, this can be done with the iteritems method (ALMOST LIKE A DICTIONARY) to unpack and the at() moethod.

for index,value in ser.iteritems():
    ser.at[index]=value+2
ser

0    104
1     54
2     74
dtype: int64

In [29]:
# A series can be created with the same index for multiples values.
valores = [1,5,9]
series6 = pd.Series(valores,index=["Vale","Vale","Vale"])
series6

Vale    1
Vale    5
Vale    9
dtype: int64

In [31]:
# A series can be appended to a different series to "concatenate it" however unlike lists, this is NOT in-plae, it must be assigned.
series7= series5.append(series6)
series7

Valentina    Matematicas
Diego            Idiomas
Clara                NaN
Tia                 Yoga
Vale                   1
Vale                   5
Vale                   9
dtype: object

In [32]:
# If an index repeats in a series and is searched for, the result is not a value, but a series
series7.loc["Vale"]

Vale    1
Vale    5
Vale    9
dtype: object

In [34]:
series7.loc["Vale"].iloc[1]

5