# Pandas: Intermediate (Part 1)

### First Steps with Pandas Series

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic

In [None]:
titanic.info()

In [None]:
titanic["age"]

In [None]:
type(titanic["age"])

In [None]:
titanic["age"].equals(titanic.age)

In [None]:
age = titanic["age"]

In [None]:
age.head(2)

In [None]:
age.tail()

In [None]:
age.dtype

In [None]:
age.shape

In [None]:
len(age)

In [None]:
age.index

In [None]:
age.info()

In [None]:
age.to_frame().info()

###  Analyzing Numerical Series

In [None]:
age

In [None]:
age.describe()

In [None]:
age.count()

In [None]:
age.size

In [None]:
len(age)

In [None]:
age.sum(skipna = False)

In [None]:
sum(age)

In [None]:
age.mean()

In [None]:
age.median()

In [None]:
age.std()

In [None]:
age.min()

In [None]:
age.max()

In [None]:
age.unique()

In [None]:
len(age.unique())

In [None]:
age.nunique(dropna = False)

In [None]:
age.value_counts()

In [None]:
age.value_counts(sort = True)

In [None]:
age.value_counts(sort = False)

In [None]:
age.value_counts(dropna = True)

In [None]:
age.value_counts(dropna = False)

In [None]:
age.value_counts(ascending = False)

In [None]:
age.value_counts(ascending = True)

In [None]:
age.value_counts(sort = True, dropna = True, ascending = False, normalize = False)

In [None]:
age.value_counts(sort = True, dropna = True, ascending = False, normalize = True)

In [None]:
30/age.count()

In [None]:
age.value_counts(sort = True, dropna = False, ascending = False, normalize = True)

In [None]:
30/age.size

In [None]:
age.value_counts(sort = True, dropna = True, ascending= False, normalize = False, bins = 5)

In [None]:
age.value_counts(sort = True, dropna = True, ascending= False, normalize = True, bins = 10)

## Analyzing non-numerical Series

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv("summer.csv")

In [None]:
summer.head()

In [None]:
summer.info()

In [None]:
athlete = summer["Athlete"]

In [None]:
athlete.head()

In [None]:
athlete.tail(5)

In [None]:
type(athlete)

In [None]:
athlete.dtype

In [None]:
athlete.shape

In [None]:
athlete.describe()

In [None]:
athlete.size

In [None]:
athlete.count()

In [None]:
athlete.min()

In [None]:
athlete.unique()

In [None]:
len(athlete.unique())

In [None]:
athlete.nunique(dropna= False)

In [None]:
athlete.value_counts()

In [None]:
athlete.value_counts(sort = True, ascending=True)

In [None]:
athlete.value_counts(sort = True, ascending=False, normalize = True).head()

## The copy() method

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
age = titanic.age.copy()

In [None]:
age.head()

In [None]:
age.iloc[2] = 29

In [None]:
age.head()

In [None]:
titanic.head()

## Sorting and introduction to the  inplace-parameter

In [None]:
import pandas as pd

In [None]:
dic = {1:10, 3:25, 2:6, 4:36, 5:2, 6:0, 7:None}
dic

In [None]:
sales = pd.Series(dic)
sales

In [None]:
sales.sort_index()

In [None]:
sales.sort_index(ascending = True, inplace= True)

In [None]:
sales

In [None]:
sales.sort_values(inplace=False)

In [None]:
sales.sort_values(ascending=False, na_position="last", inplace= True)

In [None]:
sales

In [None]:
dic = {"Mon":10, "Tue":25, "Wed":6, "Thu": 36, "Fri": 2}
dic

In [None]:
sales = pd.Series(dic)

In [None]:
sales

In [None]:
sales.sort_index(ascending=False)

## First Steps with Pandas Index Objects

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv("summer.csv", index_col="Athlete")

In [None]:
summer.head()

In [None]:
summer.tail()

In [None]:
summer.info()

In [None]:
summer.index

In [None]:
type(summer.index)

In [None]:
summer.columns

In [None]:
type(summer.columns)

In [None]:
summer.axes

In [None]:
summer.columns[:3]

In [None]:
summer.index[0]

In [None]:
summer.index[-1]

In [None]:
summer.index[100:102]

In [None]:
summer.columns.tolist()

In [None]:
summer.index.is_unique

In [None]:
summer.index.get_loc("DRIVAS, Dimitrios")

## Changing Row Index Labels

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv("summer.csv", index_col="Athlete")

In [None]:
summer.head()

In [None]:
summer.index

In [None]:
summer.reset_index(drop = False, inplace=True)

In [None]:
summer.head()

In [None]:
summer.set_index("Year", drop = True, inplace = True)

In [None]:
summer.head()

In [None]:
summer.index.is_unique

In [None]:
#summer.index[0] = 1894

In [None]:
#summer.index = "Before 2016"

In [None]:
summer.index.size

In [None]:
new_index = ["Medal_No{}".format(i) for i in range(1,summer.index.size+1)]
new_index

In [None]:
summer.index = new_index

In [None]:
summer.head()

In [None]:
summer.tail()

In [None]:
summer.index.is_unique

In [None]:
summer.index.name = "Medal_No"

In [None]:
summer.reset_index()

## Changing Column Labels

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic.tail()

In [None]:
titanic.columns

In [None]:
titanic.columns[0]

In [None]:
#titanic.columns[0] = "Alive"

In [None]:
titanic.columns = ["Alive", "Class", "Sex", "Age", "SibSp", "ParChi", "Fare", "Emb", "Deck"]

In [None]:
titanic.head()

In [None]:
titanic.columns.name

In [None]:
titanic.columns.name = "Pass_Charact"

In [None]:
titanic.head()

In [None]:
titanic.index.name = "Passenger_no"

## Renaming Index & Column Labels

In [None]:
import pandas as pd

In [None]:
summer= pd.read_csv("summer.csv", index_col = "Athlete")

In [None]:
summer.head()

In [None]:
#summer.index[0] = 'HAYOS, Alfred'

In [None]:
summer.rename({"HAJOS, Alfred":'HAYOS, Alfred'}, axis = "index", inplace= True)

In [None]:
summer.head()

In [None]:
summer.rename({"Gender":'Sex', "City":"Host_City"}, axis = "columns", inplace=True)

In [None]:
summer.head()