In [None]:
# A DataFrame is a two-axes labeled array

In [2]:
import pandas as pd

In [4]:
# An example building a DataFrame from Series
record1 = pd.Series({'Name': 'Alice', 'Class': 'Physics', 'Score': 85})
record2 = pd.Series({'Name': 'Jack', 'Class': 'Chemistry', 'Score': 82})
record3 = pd.Series({'Name': 'Helen', 'Class': 'Biology', 'Score': 90})

In [5]:
# Just like in the Series function, we can pass in our individual items in an array, and we can pass in
# our index values as a second arguments
df = pd.DataFrame([record1, record2, record3],
                 index=['school1', 'school2', 'school1'])
df.head()

Unnamed: 0,Name,Class,Score
school1,Alice,Physics,85
school2,Jack,Chemistry,82
school1,Helen,Biology,90


In [7]:
# An alternative method consists in using a list of dictionaries, where each dctionary represents a row of data
students = [{'Name': 'Alice', 'Class': 'Physics', 'Score': 85},
           {'Name': 'Jack', 'Class': 'Chemistry', 'Score': 82},
           {'Name': 'Helen', 'Class': 'Biology', 'Score': 90}]

df = pd.DataFrame(students, index=['school1', 'school2', 'school1'])
df.head()

Unnamed: 0,Name,Class,Score
school1,Alice,Physics,85
school2,Jack,Chemistry,82
school1,Helen,Biology,90


In [9]:
# Similar to the series, we can extract data using the .iloc and .loc attributes.

# If we want to select a specific row using its label, which will be returned as a Series
df.loc['school2']

Name          Jack
Class    Chemistry
Score           82
Name: school2, dtype: object

In [10]:
# Its important to remember that if there are more than one row with the same value, all are returned in a DataFrame
df.loc['school1']

Unnamed: 0,Name,Class,Score
school1,Alice,Physics,85
school1,Helen,Biology,90


In [13]:
# If we want to access data based on multiple axes, you just have to include all labels in one indexing operator
df.loc['school1', 'Name']

school1    Alice
school1    Helen
Name: Name, dtype: object

In [14]:
# In order to access to a column, we can transpose the DataFrame and then use the .loc attribute using the column label
df.T.loc['Name']

school1    Alice
school2     Jack
school1    Helen
Name: Name, dtype: object

In [15]:
# We can also directly use the indexing operation with the column label
df['Name']

school1    Alice
school2     Jack
school1    Helen
Name: Name, dtype: object

In [16]:
# Then, another way of accessing to data based in multiple axes is by chaining operations together
df.loc['school1']['Name']

school1    Alice
school1    Helen
Name: Name, dtype: object

In [None]:
# *NOTE*: When chaining operations, a copy of the DataFrame is created. Then this operation is slower than necessary and 
# if you are changing data, it can be a source of error. void when possible.

In [17]:
# We can also perform slicing in DataFrames
df.loc[:, ['Name','Score']] # all rows, columns labeled as Name and Score

Unnamed: 0,Name,Score
school1,Alice,85
school2,Jack,82
school1,Helen,90


In [19]:
# We can use the drop function to delete rows and columns, but we need to take into account that it creates a 
# copy of your DataFrame modified, will the original rests unchanged
df.drop('school1')

Unnamed: 0,Name,Class,Score
school2,Jack,Chemistry,82


In [20]:
df

Unnamed: 0,Name,Class,Score
school1,Alice,Physics,85
school2,Jack,Chemistry,82
school1,Helen,Biology,90


In [23]:
# Drop has two optional parameters, the inplace and the axis:
#If the inplace is set to true, the DataFrame is directly modified, and no copoy is created
#IF axis is 0 we drop rows, if its 1 we drop columns

# Example
copy_df = df.copy()

copy_df.drop('Name', inplace = True, axis = 1)
copy_df

Unnamed: 0,Class,Score
school1,Physics,85
school2,Chemistry,82
school1,Biology,90


In [24]:
# We can also drop columns using the index operator and the del keyword, which directly modifies the DataFrame
del copy_df['Class']
copy_df

Unnamed: 0,Score
school1,85
school2,82
school1,90


In [25]:
# Adding a new column to a DataFrame is as easy as assigning it to some value using the indexing operator
copy_df['ClassRanking'] = None
copy_df

Unnamed: 0,Score,ClassRanking
school1,85,
school2,82,
school1,90,


In [None]:
# If we wan to add new rows, we can use both the loc attribute or the append() method

In [27]:
# The append method creates a copy with the new row
new_record = pd.DataFrame({'Name': 'Carl', 'Class': 'Python', 'Score': 72}, index = ['school3'])
copy_df.append(new_record)

Unnamed: 0,Class,ClassRanking,Name,Score
school1,,,,85
school2,,,,82
school1,,,,90
school3,Python,,Carl,72


In [34]:
# Using the loc attribute, you directly modify the original DataFrame
copy_df.loc['school3'] = [87, None]
copy_df

Unnamed: 0,Score,ClassRanking
school1,85,
school2,82,
school1,90,
school3,87,
