# pandas: Intermediate (Part 1)

## First Steps with Pandas Series

### Importing pandas and Numpy

![alt text](../Assets/numpy_logo.png)

The next cell will import the pandas and Numpy packages.

NumPy is a Python library used for working with arrays. It also has functions for working in different math fields as linear algebra and matrices.

In [None]:
import pandas as pd
import numpy as np

In [None]:
titanic = pd.read_csv("../Data/titanic.csv")

In [None]:
titanic.head()

In [None]:
age=titanic["age"]

In [None]:
age.head(n=2)

In [None]:
age.tail()

In [None]:
age.dtype

In [None]:
#age.info()

In [None]:
age.shape

In [None]:
len(age)

In [None]:
age.index

In [None]:
age.describe()

##  Analyzing Numerical Series

In [None]:
age.dtype

In [None]:
age.count()

In [None]:
age.size

In [None]:
len(age)

In [None]:
age.sum(skipna=True)

In [None]:
sum(age)

In [None]:
age.mean(skipna = True)

In [None]:
age.std()

In [None]:
age.min()

In [None]:
age.max()

In [None]:
age.median()

In [None]:
age.unique()

In [None]:
len(age.unique())

In [None]:
print( age.nunique(dropna = False) )

print( age.nunique(dropna = True) )

In [None]:
age.value_counts()

In [None]:
age.value_counts(dropna = False)

In [None]:
age.value_counts(dropna = True, sort = True, ascending = True)

In [None]:
age.value_counts(dropna = True, sort = True, ascending = False, normalize = True)

In [None]:
age.value_counts(dropna = True, sort = True, ascending= False, bins = 5).head()

## Analyzing non-numerical Series

In [None]:
summer = pd.read_csv("../Data/summer.csv")

In [None]:
summer.head()

In [None]:
summer.info()

In [None]:
athlete = summer["Athlete"]

In [None]:
athlete.head()

In [None]:
type(athlete)

In [None]:
athlete.shape

In [None]:
athlete.describe()

In [None]:
athlete.size

In [None]:
athlete.count()

In [None]:
athlete.min()

In [None]:
athlete.unique()

In [None]:
len(athlete.unique())

In [None]:
athlete.nunique(dropna= False)

In [None]:
athlete.value_counts()

In [None]:
athlete.value_counts(sort = True, ascending=True)

## Sorting and the inplace-parameter

In [None]:
dic = {1:10, 3:25, 2:6, 4:36, 5:2, 6:0, 7:None}
dic

In [None]:
sales = pd.Series(dic)
sales

In [None]:
sales.sort_index()

##### Inplace

Inplace is an argument used in different functions. The default value of this attribute is False and it returns the copy of the object. When set to True, the changes will be implemented in the object.

In [None]:
sales.sort_index(ascending = True, inplace= True)

In [None]:
sales

In [None]:
sales.sort_values(inplace=False)

In [None]:
sales.sort_values(ascending=False, na_position="last", inplace= True)

In [None]:
sales

In [None]:
dic = {"Mon":10, "Tue":25, "Wed":6, "Thu": 36, "Fri": 2}
dic

In [None]:
sales = pd.Series(dic)

In [None]:
sales

In [None]:
sales.sort_index(ascending=False)

In [None]:
summer.reset_index(drop = False, inplace=True)

In [None]:
summer.head()

In [None]:
summer.set_index("Year", drop = True, inplace = True)

In [None]:
summer.head()

In [None]:
summer.index.is_unique

In [None]:
summer.index.size

In [None]:
new_index = ["Medal_No{}".format(i) for i in range(1,summer.index.size+1)]
new_index

In [None]:
type(new_index)

In [None]:
summer.index = new_index

In [None]:
summer.head()

In [None]:
summer.index.is_unique

In [None]:
summer.index.name = "Medal_No"

In [None]:
summer.reset_index()

## Changing Column Labels

In [None]:
titanic = pd.read_csv("../Data/titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic.columns[0]

In [None]:
# titanic.columns[0] = "Alive"

In [None]:
titanic.columns = ["Alive", "Class", "Sex", "Age", "SibSp", "ParChi", "Fare", "Emb", "Deck"]

In [None]:
titanic.head()

In [None]:
titanic.columns.name = "Passenger_no"

In [None]:
titanic.head()

In [None]:
titanic.index.name = "Passenger_no"

## Renaming Index & Column Labels

In [None]:
summer= pd.read_csv("../Data/summer.csv", index_col = "Athlete")

In [None]:
summer.head()

In [None]:
#summer.index[0] = 'HAYOS, Alfred'

In [None]:
summer.rename({"HAJOS, Alfred":'HAYOS, Alfred'}, axis = "index", inplace= True)

In [None]:
summer.head()

In [None]:
summer.rename({"Gender":'Sex', "City":"Host_City"}, axis = "columns", inplace=True)

In [None]:
summer.head()