## Pandas Tutorial

### 1. Introduction to Pandas


In [109]:
import pandas as pd
import numpy as np

### 2. Creating DataFrames

In [110]:
# From Dictionary
df_dict = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank', 'Grace', 'Helen'],
    'Gender': ['F', 'M', 'M', 'M', 'F', 'M', 'F', 'F'],
    'Age': [24, 35, 45, 23, 34, 28, 22, 30],
    'City': ['New York', 'Los Angeles', 'Chicago', 'New York', 'Chicago', 'Los Angeles', 'New York', 'Chicago'],
    'Salary': [70000, 80000, 120000, 65000, 90000, 75000, 72000, 85000],
    'Department': ['HR', 'Finance', 'Engineering', 'HR', 'Engineering', 'Finance', 'HR', 'Engineering']
}, index=[1, 2, 3, 4, 5, 6, 7, 8])

In [111]:
df_dict

Unnamed: 0,Name,Gender,Age,City,Salary,Department
1,Alice,F,24,New York,70000,HR
2,Bob,M,35,Los Angeles,80000,Finance
3,Charlie,M,45,Chicago,120000,Engineering
4,David,M,23,New York,65000,HR
5,Eva,F,34,Chicago,90000,Engineering
6,Frank,M,28,Los Angeles,75000,Finance
7,Grace,F,22,New York,72000,HR
8,Helen,F,30,Chicago,85000,Engineering


### 3. Basic Operations

In [112]:
# Viewing Data
df_dict.head(3)

Unnamed: 0,Name,Gender,Age,City,Salary,Department
1,Alice,F,24,New York,70000,HR
2,Bob,M,35,Los Angeles,80000,Finance
3,Charlie,M,45,Chicago,120000,Engineering


In [113]:
df_dict.tail(6)

Unnamed: 0,Name,Gender,Age,City,Salary,Department
3,Charlie,M,45,Chicago,120000,Engineering
4,David,M,23,New York,65000,HR
5,Eva,F,34,Chicago,90000,Engineering
6,Frank,M,28,Los Angeles,75000,Finance
7,Grace,F,22,New York,72000,HR
8,Helen,F,30,Chicago,85000,Engineering


In [114]:
df_dict.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 1 to 8
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        8 non-null      object
 1   Gender      8 non-null      object
 2   Age         8 non-null      int64 
 3   City        8 non-null      object
 4   Salary      8 non-null      int64 
 5   Department  8 non-null      object
dtypes: int64(2), object(4)
memory usage: 448.0+ bytes


In [115]:
df_dict.describe()

Unnamed: 0,Age,Salary
count,8.0,8.0
mean,30.125,82125.0
std,7.735586,17332.360321
min,22.0,65000.0
25%,23.75,71500.0
50%,29.0,77500.0
75%,34.25,86250.0
max,45.0,120000.0


In [116]:
df_dict

Unnamed: 0,Name,Gender,Age,City,Salary,Department
1,Alice,F,24,New York,70000,HR
2,Bob,M,35,Los Angeles,80000,Finance
3,Charlie,M,45,Chicago,120000,Engineering
4,David,M,23,New York,65000,HR
5,Eva,F,34,Chicago,90000,Engineering
6,Frank,M,28,Los Angeles,75000,Finance
7,Grace,F,22,New York,72000,HR
8,Helen,F,30,Chicago,85000,Engineering


### Accessing Data

In [117]:
df_dict.loc[1]

Name             Alice
Gender               F
Age                 24
City          New York
Salary           70000
Department          HR
Name: 1, dtype: object

In [119]:
df_dict.iloc[0]

Name             Alice
Gender               F
Age                 24
City          New York
Salary           70000
Department          HR
Name: 1, dtype: object

In [120]:
df_dict.at[1, 'Age']

24

In [121]:
df_dict.iat[0, 0]

'Alice'

In [122]:
df_dict

Unnamed: 0,Name,Gender,Age,City,Salary,Department
1,Alice,F,24,New York,70000,HR
2,Bob,M,35,Los Angeles,80000,Finance
3,Charlie,M,45,Chicago,120000,Engineering
4,David,M,23,New York,65000,HR
5,Eva,F,34,Chicago,90000,Engineering
6,Frank,M,28,Los Angeles,75000,Finance
7,Grace,F,22,New York,72000,HR
8,Helen,F,30,Chicago,85000,Engineering


### Sorting Data

In [124]:
df_dict.sort_values(by='Age', ascending=False)

Unnamed: 0,Name,Gender,Age,City,Salary,Department
3,Charlie,M,45,Chicago,120000,Engineering
2,Bob,M,35,Los Angeles,80000,Finance
5,Eva,F,34,Chicago,90000,Engineering
8,Helen,F,30,Chicago,85000,Engineering
6,Frank,M,28,Los Angeles,75000,Finance
1,Alice,F,24,New York,70000,HR
4,David,M,23,New York,65000,HR
7,Grace,F,22,New York,72000,HR


## 4. Data Manipulation
### Handling Missing Data

In [125]:
df_nan = pd.DataFrame({'A': [1, np.nan, 3]})

In [126]:
df_nan

Unnamed: 0,A
0,1.0
1,
2,3.0


In [127]:
df_nan.isnull()

Unnamed: 0,A
0,False
1,True
2,False


In [128]:
df_nan.fillna(2)

Unnamed: 0,A
0,1.0
1,2.0
2,3.0


In [129]:
df_nan.dropna()

Unnamed: 0,A
0,1.0
2,3.0


In [130]:
# Filtering Data
df_dict[df_dict['Age'] > 30]

Unnamed: 0,Name,Gender,Age,City,Salary,Department
2,Bob,M,35,Los Angeles,80000,Finance
3,Charlie,M,45,Chicago,120000,Engineering
5,Eva,F,34,Chicago,90000,Engineering


In [131]:
df_dict

Unnamed: 0,Name,Gender,Age,City,Salary,Department
1,Alice,F,24,New York,70000,HR
2,Bob,M,35,Los Angeles,80000,Finance
3,Charlie,M,45,Chicago,120000,Engineering
4,David,M,23,New York,65000,HR
5,Eva,F,34,Chicago,90000,Engineering
6,Frank,M,28,Los Angeles,75000,Finance
7,Grace,F,22,New York,72000,HR
8,Helen,F,30,Chicago,85000,Engineering


In [132]:
# Grouping Data
df_dict.groupby('Age').sum()

Unnamed: 0_level_0,Name,Gender,City,Salary,Department
Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
22,Grace,F,New York,72000,HR
23,David,M,New York,65000,HR
24,Alice,F,New York,70000,HR
28,Frank,M,Los Angeles,75000,Finance
30,Helen,F,Chicago,85000,Engineering
34,Eva,F,Chicago,90000,Engineering
35,Bob,M,Los Angeles,80000,Finance
45,Charlie,M,Chicago,120000,Engineering


In [None]:
df_dict.groupby('Department').agg({
    'Name': ', '.join,  # Concatenate names with a comma
    'Age': 'mean',      # Calculate the mean age
    'Salary': 'sum'     # Sum of salaries
})

In [None]:
# Merging DataFrames
pd.merge(df_dict, df_dict, on='Age')

In [None]:
# Concatenating DataFrames
pd.concat([df_dict, df_dict])

### 5. Visualization

In [None]:
df_dict['Salary'].plot(kind='bar')
plt.show()

In [None]:
# 6. Saving Data
df_dict.to_csv('data.csv')

In [None]:
df_dict.to_excel('data.xlsx')