In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xlrd

# Creating Dataframes

### From a list

In [12]:
# We always create data in a list of lists
# Each list is a row
# Each element of the list is a column
data = [["Felipe", 40, 30]]
df   = pd.DataFrame(
    data    = data,
    columns = ["Name", "Age", "Salary"] 
)

In [13]:
df.head()

Unnamed: 0,Name,Age,Salary
0,Felipe,40,30


In [14]:
# Ok! Let's create a new row
data.append(["Helena", 40, 40])
df = pd.DataFrame(
    data    = data,
    columns = ["Name", "Age", "Salary"] 
)

In [15]:
df.head()

Unnamed: 0,Name,Age,Salary
0,Felipe,40,30
1,Helena,40,40


### From a dictionary

In [16]:
data = {
    "Name": ["Felipe", "Helena"],
    "Age": [40, 40],
    "Salary": [30, 40]
}
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Name,Age,Salary
0,Felipe,40,30
1,Helena,40,40


In [17]:
# As you can see, the index is an incremental number
# If the data comes from a database, it is better to use the id
# as the index
data = {
    "Name": ["Felipe", "Helena"],
    "Age": [40, 40],
    "Salary": [30, 40]
}
df = pd.DataFrame(data, index = ["id1", "id2"])
df.head()

Unnamed: 0,Name,Age,Salary
id1,Felipe,40,30
id2,Helena,40,40


### Add and delete colums

In [26]:
# You can't add a new DataFrame. You have to create a new Series
# and add it to the DataFrame.
# You have to use index if you change the index
df["Company"] = pd.Series(["Pumatronix", "Aliança"], index=["id1", "id2"])

In [27]:
df.head()

Unnamed: 0,Name,Age,Salary,Company
id1,Felipe,40,30,Pumatronix
id2,Helena,40,40,Aliança


In [28]:
data = {
    "Name": ["Felipe", "Helena"],
    "Age": [40, 40],
    "Salary": [30, 40],
}
df = pd.DataFrame(data)

In [29]:
df["Company"] = pd.Series(["Pumatronix", "Aliança"])

In [30]:
df.head()

Unnamed: 0,Name,Age,Salary,Company
0,Felipe,40,30,Pumatronix
1,Helena,40,40,Aliança


Note that when you don't change de index, the new Series will be placed in order.

In [31]:
del(df["Company"])

In [32]:
df

Unnamed: 0,Name,Age,Salary
0,Felipe,40,30
1,Helena,40,40


In [33]:
df.drop("Salary", axis=1)

Unnamed: 0,Name,Age
0,Felipe,40
1,Helena,40


In [34]:
df

Unnamed: 0,Name,Age,Salary
0,Felipe,40,30
1,Helena,40,40


### Selecting lines

In [36]:
newReg = pd.DataFrame(
    data    = [["João", 20, 10]],
    columns = ["Name", "Age", "Salary"]
)
df = pd.concat([df, newReg])

In [37]:
df.head()

Unnamed: 0,Name,Age,Salary
0,Felipe,40,30
1,Helena,40,40
0,João,20,10


Let's create a class do add and to remove info into a dataframe

In [108]:
class IncrivelFabrica:
    def __init__(self, name):
        self.df = pd.DataFrame(
            data    = [],
            columns = ["Name", "Age", "Salary"]
        )
        self.availableIds = []
    def head(self):
        return self.df.head()
    
    def add(self, name, age, salary):
        index = len(self.df)
        while index in self.availableIds:
            index += 1
        self.availableIds.append(index)
        newReg = pd.DataFrame(
            data    = [[name, age, salary]],
            columns = ["Name", "Age", "Salary"],
            index   = [index]
        )
        self.df = pd.concat([self.df, newReg])
    def delete(self, index):
        self.df = self.df.drop(index)
    def getDf(self):
        return self.df

In [109]:
oi = IncrivelFabrica("oi")
oi.head()

Unnamed: 0,Name,Age,Salary


In [110]:
oi.add("Felipe", 40, 30)
oi.head()

Unnamed: 0,Name,Age,Salary
0,Felipe,40,30


In [111]:
oi.add("Helena", 40, 40)
oi.head()

Unnamed: 0,Name,Age,Salary
0,Felipe,40,30
1,Helena,40,40


In [112]:
oi.add("João", 20, 10)
oi.head()

Unnamed: 0,Name,Age,Salary
0,Felipe,40,30
1,Helena,40,40
2,João,20,10


In [113]:
oi.delete(0)
oi.head()

Unnamed: 0,Name,Age,Salary
1,Helena,40,40
2,João,20,10


In [114]:
oi.delete(2)
oi.head()

Unnamed: 0,Name,Age,Salary
1,Helena,40,40


In [115]:
oi.add("Felipe", 40, 30)
oi.add("João", 20, 10)
oi.head()

Unnamed: 0,Name,Age,Salary
1,Helena,40,40
3,Felipe,40,30
4,João,20,10


In [116]:
oi.add("Satanás", 10, 3)
oi.add("Isaías", 20, 1)
oi.head()

Unnamed: 0,Name,Age,Salary
1,Helena,40,40
3,Felipe,40,30
4,João,20,10
5,Satanás,10,3
6,Isaías,20,1


In [117]:
df = oi.getDf()

In [118]:
df.head()

Unnamed: 0,Name,Age,Salary
1,Helena,40,40
3,Felipe,40,30
4,João,20,10
5,Satanás,10,3
6,Isaías,20,1


In [119]:
df.iloc[1]

Name      Felipe
Age           40
Salary        30
Name: 3, dtype: object

In [120]:
df.iloc[0]

Name      Helena
Age           40
Salary        40
Name: 1, dtype: object

In [126]:
df[1:2]

Unnamed: 0,Name,Age,Salary
3,Felipe,40,30


In [127]:
df[0:1]

Unnamed: 0,Name,Age,Salary
1,Helena,40,40
