# **Pandas Series and Index Objects**

### **Analyzing Numerical Series with unique(), nunique() and value_counts()**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic

In [None]:
titanic.info

In [None]:
age = titanic['age']

In [None]:
type(age)

In [None]:
age.equals(titanic.age)

In [None]:
age.dtype

In [None]:
age.head()

In [None]:
age.tail()

In [None]:
age.shape

In [None]:
len(age)

In [None]:
age_frame = age.to_frame()

In [None]:
age_frame.shape

In [None]:
age.describe()

In [None]:
age.info()

In [None]:
age.count()

In [None]:
age.size

In [None]:
age.sum()

In [None]:
sum(age)

In [None]:
age.mean()

In [None]:
age.sum() / len(age)

In [None]:
age.min()

In [None]:
age.max()

In [None]:
age.std()

Some playground:

In [None]:
cols = list(titanic.columns)
for col in cols:
    if titanic[col].dtype in ['int64', 'float64']:
        print(f'{col}: {titanic[col].mean()}')

In [None]:
age.unique()

In [None]:
len(age.unique())

In [None]:
age.nunique(dropna = False)

In [None]:
age.value_counts()

In [None]:
age.value_counts(normalize = True)

In [None]:
age.value_counts(bins = 5)

In [None]:
30/age.count()

### **Analyzing non-numerical Series with unique(), nunique(), value_counts()**

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv('summer.csv')

In [None]:
summer.head()

In [None]:
summer.info()

In [None]:
athlete = summer['Athlete']

In [None]:
athlete.head(10)

In [None]:
athlete.tail(10)

In [None]:
athlete.dtype

In [None]:
type(athlete)

In [None]:
athlete.shape

In [None]:
athlete.describe()

In [None]:
athlete.size

In [None]:
athlete.count()

In [None]:
athlete.min()

In [None]:
athlete.unique()

In [None]:
len(athlete.unique())

In [None]:
athlete.nunique()

In [None]:
athlete.value_counts()

In [None]:
athlete.value_counts(sort = True, ascending = True)

In [None]:
athlete.value_counts(sort = True, normalize = True).head()

### **Creating Pandas Series**

#### **Part 1.**

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv('summer.csv')

In [None]:
summer.Athlete

In [None]:
summer.iloc[0]

In [None]:
pd.Series([1, 2, 3, 4, 5])

In [None]:
pd.Series(data = [1, 2, 3, 4, 5], index = ['a', 'b', 'c', 'd', 'e'])

In [None]:
pd.Series(data = [1, 2, 3, 4, 5], index = ['a', 'b', 'c', 'd', 'e'], name = 'Test')

#### **Part 2.**

###### **From NumPy array**

In [None]:
import numpy as np
import pandas as pd

In [None]:
np_array = np.array([1, 2, 3, 4, 5])
np_array

In [None]:
pd.Series(np_array)

###### **From Python list**

In [None]:
p_list = [1, 2, 3, 4, 5]
p_list

In [None]:
pd.Series(p_list)

###### **From Python tuple**

In [None]:
p_tuple = (1, 2, 3, 4, 5)
p_tuple

In [None]:
pd.Series(p_tuple)

###### **From Python dictionary**

In [None]:
p_dict = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5}
p_dict

In [None]:
pd.Series(p_dict)

### **Indexing and Slicing**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic.tail()

In [None]:
age = titanic.age

In [None]:
age.head()

In [None]:
age.tail()

In [None]:
age[0]

In [None]:
age.iloc[-1]

In [None]:
age[[3, 4]]

In [None]:
age[:3]

In [None]:
age.loc[:3]

In [None]:
summer = pd.read_csv('summer.csv', index_col = 'Athlete')

In [None]:
summer

In [None]:
event = summer.Event

In [None]:
event.head()

In [None]:
event.tail()

In [None]:
event[0]

In [None]:
event[-1]

In [None]:
event[:3]

In [None]:
event.iloc[:3]

In [None]:
event['PHELPS, Michael']

### **Sorting of series**

In [None]:
import pandas as pd

In [None]:
dict_values = {
    1: 3,
    2: 5,
    4: 42,
    6: 26,
    5: 55,
    3: 33,
    7: None
}

In [None]:
sales = pd.Series(dict_values)

In [None]:
sales.sort_values()

In [None]:
sales.sort_index()

In [None]:
sales.sort_index(ascending = False)

In [None]:
sales.sort_index(ascending = True)

In [None]:
sales.sort_index(ascending = False)

In [None]:
sales.sort_index(ascending = True, inplace = True)

In [None]:
sales

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
age = titanic.age

In [None]:
age.head()

In [None]:
age.tail()

### **nlargest() and nsmallest()**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
age = titanic.age

In [None]:
age

In [None]:
age2 = age.copy()

In [None]:
age2.sort_values(ascending = False, inplace = True)

In [None]:
age2.head(10)

In [None]:
age2.nlargest(n = 10)

In [None]:
age2.nlargest(n = 10, keep = 'all')

In [None]:
age2.nsmallest(n = 10)

In [None]:
titanic.info()

### **idxmin() and idxmax()**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
fare = titanic.fare

In [None]:
fare.nlargest(n = 3).index[0]

In [None]:
fare.idxmax()

In [None]:
fare.nsmallest(n = 3).index[0]

In [None]:
fare.idxmin()

In [None]:
titanic.loc[fare.idxmax()]

In [None]:
titanic.loc[fare.idxmin()]

In [None]:
dic = {
    "Mon": 10.0,
    "Tue": 25.0,
    "Wed": 6.0,
    "Thu": 36.0,
    "Fri": 2.0,
    "Sat": 0,
    "Sun": None,
}

In [None]:
sales = pd.Series(dic)

In [None]:
sales

In [None]:
sales.sort_values(ascending = True).index[0]

In [None]:
sales.idxmin()

In [None]:
sales.sort_values(ascending = False).index[0]

In [None]:
sales.idxmax()

In [None]:
sales.loc[sales.idxmax()]

### **Manipulating Pandas Series**

In [None]:
import pandas as pd

In [None]:
sales = pd.Series([10, 25, 6, 36, 2, 0, None, 5], index = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun", "Mon"])

In [None]:
sales

In [None]:
sales.Sun = 0

In [None]:
sales

In [None]:
sales.iloc[3] = 30

In [None]:
sales

In [None]:
sales/1.1

In [None]:
sales_EUR = (sales/1.1).round(2)

In [None]:
sales_EUR

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
age = titanic.age

In [None]:
age

In [None]:
age.head()

In [None]:
age[1] = 30

In [None]:
age.head()

In [None]:
titanic.head(n = 5)

In [None]:
age_2 = titanic.age.copy()

In [None]:
age_2[1] = 38

In [None]:
age_2.head()

### **First Steps with Pandas Index Objects**

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv('summer.csv')

In [None]:
summer.head()

In [None]:
summer.tail()

In [None]:
summer.info()

In [None]:
summer.describe()

In [None]:
summer.index

In [None]:
summer.columns

In [None]:
summer.axes

In [None]:
summer.axes[0]

In [None]:
summer.axes[1]

In [None]:
type(summer.index)

In [None]:
type(summer.columns)

In [None]:
summer_index = pd.read_csv('summer.csv', index_col = 'Athlete')

In [None]:
summer_index.head()

In [None]:
summer_index.index

In [None]:
type(summer_index.index)

In [None]:
summer.columns[:3]

In [None]:
summer_index.index[0]

In [None]:
summer_index.index[100:102]

In [None]:
summer_index.columns.to_numpy()

In [None]:
summer_index.columns.to_list()

In [None]:
summer_index.index.is_unique

In [None]:
summer_index.index.get_loc('DRIVAS, Dimitrios')

In [None]:
summer_index.index.get_loc('PHELPS, Michael')

### **Creating index objects**

In [None]:
import pandas as pd

In [None]:
list_1 = [1, 2, 3]

In [None]:
list_1

In [None]:
pd.Index(list_1)

In [None]:
list_2 = ['a', 'b', 'c']

In [None]:
pd.Index(list_2)

In [None]:
pd.Index(range(1, 5))

In [None]:
idx = pd.Index(['x', 'y', 'z'], name = 'Idx')

In [None]:
pd.Series([1, 2, 3], index = idx, name = 'Values')

### **Change row index**

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv('summer.csv', index_col = 'Athlete')

In [None]:
summer.describe()

In [None]:
summer.head()

In [None]:
summer.index

In [None]:
summer.reset_index()

In [None]:
summer

In [None]:
summer.reset_index(inplace = True)

In [None]:
summer

In [None]:
summer.set_index('Year')

In [None]:
summer.reset_index()

In [None]:
summer.index.is_unique

In [None]:
summer.set_index('Year', inplace = True)

In [None]:
summer.index.is_unique

In [None]:
summer.reset_index(inplace = True)

In [None]:
summer

In [None]:
new_index = [f'Row {i}' for i in range(1, summer.index.size + 1)]

In [None]:
new_index

In [None]:
summer.index = new_index

In [None]:
summer.head()

In [None]:
summer.index.name = "Rows"

In [None]:
summer.head()

In [None]:
# summer.reset_index(drop = True)
summer.reset_index(drop = True, inplace = True)

In [None]:
summer.head()

### **Changing column labels**

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv('titanic.csv')

In [None]:
titanic.head()

In [None]:
titanic.describe()

In [None]:
titanic.columns

In [None]:
titanic.columns[0]

In [None]:
titanic.columns[0] = 'Alive'

In [None]:
titanic.columns = ['Col 1', 'Col 2', 'Col 3', 'Col 4', 'Col 5', 'Col 6', 'Col 7', 'Col 8', 'Col 9']

In [None]:
titanic

In [None]:
titanic.columns.name = 'Col Names'

In [None]:
titanic

In [None]:
titanic.index.name = 'Row Names'

In [None]:
titanic

### **Ranaming Index and Column labels**

In [1]:
import pandas as pd

In [2]:
summer = pd.read_csv('summer.csv', index_col = 'Athlete')

In [3]:
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [4]:
summer.index[0] = 'HAYOS, Alfred'

TypeError: Index does not support mutable operations

In [5]:
summer.rename(mapper = {'HAJOS, Alfred': 'HAYOS, Alfred'}, axis = 'index')

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAYOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver
...,...,...,...,...,...,...,...,...
"JANIKOWSKI, Damian",2012,London,Wrestling,Wrestling Freestyle,POL,Men,Wg 84 KG,Bronze
"REZAEI, Ghasem Gholamreza",2012,London,Wrestling,Wrestling Freestyle,IRI,Men,Wg 96 KG,Gold
"TOTROV, Rustam",2012,London,Wrestling,Wrestling Freestyle,RUS,Men,Wg 96 KG,Silver
"ALEKSANYAN, Artur",2012,London,Wrestling,Wrestling Freestyle,ARM,Men,Wg 96 KG,Bronze


In [7]:
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [8]:
summer.rename(mapper = {'HAJOS, Alfred': 'HAYOS, Alfred'}, axis = 'index', inplace = True)

In [9]:
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAYOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [10]:
summer.axes

[Index(['HAYOS, Alfred', 'HERSCHMANN, Otto', 'DRIVAS, Dimitrios',
        'MALOKINIS, Ioannis', 'CHASAPIS, Spiridon', 'CHOROPHAS, Efstathios',
        'HAYOS, Alfred', 'ANDREOU, Joannis', 'CHOROPHAS, Efstathios',
        'NEUMANN, Paul',
        ...
        'AHMADOV, Emin', 'KAZAKEVIC, Aleksandr', 'KHUGAEV, Alan',
        'EBRAHIM, Karam Mohamed Gaber', 'GAJIYEV, Danyal', 'JANIKOWSKI, Damian',
        'REZAEI, Ghasem Gholamreza', 'TOTROV, Rustam', 'ALEKSANYAN, Artur',
        'LIDBERG, Jimmy'],
       dtype='object', name='Athlete', length=31165),
 Index(['Year', 'City', 'Sport', 'Discipline', 'Country', 'Gender', 'Event',
        'Medal'],
       dtype='object')]

In [13]:
summer.rename(index = {'HAYOS, Alfred': 'HAJOS, Alfred'}, inplace = True)

In [14]:
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [15]:
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [19]:
summer.rename(mapper = {"Gender": 'SEX'}, inplace = True, axis = 'columns')

In [20]:
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,SEX,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver


In [24]:
summer.rename(columns = {'SEX': 'Gender'}, inplace = True)

In [25]:
summer.head()

Unnamed: 0_level_0,Year,City,Sport,Discipline,Country,Gender,Event,Medal
Athlete,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"HAJOS, Alfred",1896,Athens,Aquatics,Swimming,HUN,Men,100M Freestyle,Gold
"HERSCHMANN, Otto",1896,Athens,Aquatics,Swimming,AUT,Men,100M Freestyle,Silver
"DRIVAS, Dimitrios",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Bronze
"MALOKINIS, Ioannis",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Gold
"CHASAPIS, Spiridon",1896,Athens,Aquatics,Swimming,GRE,Men,100M Freestyle For Sailors,Silver
