In [None]:
# %% [markdown]
# # 5 - Pandas Basics
#
# ## 5.1 Pandas Structures
#
# ### Series
#
# Series are array-like structures that can hold values of the same type and have an associated
# index array to them.
#

In [None]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame

In [None]:
obj = Series([4, 2, 1, 7])
obj

In [None]:
obj.array
#

In [None]:
obj.index

### DataFrame

the DataFrame is a collection of columns containing some type of data, with an index both
for the rows and for the columns. it can be created by a dictionaary, such as below:

In [None]:
data = {
    "state": ["Parana", "Goias", "Bahia", "Goias", "Parana"],
    "year": [1992, 2001, 2013, 2002, 2003],
    "pop": [1.3, 2.5, 3.1, 4.6, 3.0],
}
frame = DataFrame(data)
frame

In [None]:
# we can see its head and tails
frame.head()

In [None]:
frame.tail()

In [None]:
# we can set its columns to a ceartin order:
pd.DataFrame(data, columns=["pop", "year"])

In [None]:
# passing a column with no correspondence creates a column with missing values
frame2 = pd.DataFrame(data, columns=["pop", "year", "debt"])

In [None]:
# columns can be acessed as indexes, returning a Series object, or as parameters
frame["state"]

In [None]:
frame.state

In [None]:
# we can return rows with loc and iloc methods.
frame2.iloc[0]

In [None]:
frame.loc[1]

In [None]:
# we can assign values to columns
frame2["debt"] = np.arange(5.0)
frame2

In [None]:
# when assigning a series, its indexes are alighned to the dataframe ones. values without indexes are missing values.
# assiging a column that doesnt exist creates the column.
# columns can be deleted with the del keyword. in this example, we first add a boolean column of states == Parana
frame["Boolean"] = frame["state"] == "Parana"
frame

In [None]:
del frame["Boolean"]

In [None]:
frame

In [None]:
# We can transpose a dataframe with the .T attribute. it should be noted, however, that if the columns do not have all the same type, the type is discarded and a object is left.

In [None]:
# when creating a dataframe from a nested dictionary, the outer dictionary  keys are considered the columns and the inner dictionary keys are considered row indices.
populations = {
    "Ohio": {2000: 1.5, 2001: 1.7, 2002: 3.6},
    "Nevada": {2001: 2.4, 2002: 2.9},
}
DataFrame(populations)

In [None]:
# if indexes are explicitally called, then the inner keys might not be the indexes of the DataFrame
frame3 = DataFrame(populations, index=[2001, 2002, 2003])

In [None]:
frame3.columns.name = "states"

In [None]:
frame3.columns

In [None]:
frame3

In [None]:
frame4 = frame3.loc[[2003, 2001], ["Ohio"]]
frame4.loc[[2003], ["Ohio"]] = 4.7
frame3

In [None]:
frame4

In [None]:
indexes_to_keep = frame3.index.difference([2002])
frame5 = frame3.reindex(indexes_to_keep)
frame5

### Dropping Entries from an Axis example

In [None]:
data = pd.DataFrame(
    np.arange(16).reshape((4, 4)),
    index=["Ohio", "Colorado", "Utah", "New York"],
    columns=["one", "two", "three", "four"],
)
data

In [None]:
data.drop(index=["Colorado", "Ohio"])

In [None]:
data.drop(columns=["two"])

In [None]:
data.drop("three", axis=1)