In [1]:
# Pandas DataFrames: A Concise Guide

#Creating DataFrames

#Empty DataFrame

import pandas as pd

df_empty = pd.DataFrame()

In [2]:
#From Dictionary

data = {
    'Name': ['Alice', 'Bob', 'Chandni', 'Dawood'],
    'Age': [25, 30, 18, 22],
    'City': ['Mumbai', 'Kolkata', 'Delhi', 'Bengaluru']
}
df1 = pd.DataFrame(data)

In [3]:
#From Pandas Series

names = pd.Series(['Alice', 'Bob', 'Chandni', 'Dawood'])
ages = pd.Series([25, 30, 18, 22])
cities = pd.Series(['Mumbai', 'Kolkata', 'Delhi', 'Bengaluru'])

df2 = pd.DataFrame({'Name': names, 'Age': ages, 'City': cities})

In [15]:
#DataFrame Attributes
df1.dtypes  # Column data types
df1.ndim  # Number of dimensions
df1.shape  # (rows, columns)
df1.size  # Total elements
df1.index  # Row index
df1.columns  # Column names
df1.values  # Data as an array

array([['Alice', 25, 'Mumbai'],
       ['Bob', 30, 'Kolkata'],
       ['Chandni', 18, 'Delhi'],
       ['Dawood', 22, 'Bengaluru']], dtype=object)

Reading Data

CSV

df_csv = pd.read_csv('data.csv')

Excel

df_excel = pd.read_excel('data.xlsx')

JSON

df_json = pd.read_json('data.json')

Read CSV with Options
df_custom = pd.read_csv('data.csv', delimiter=';', header=2, index_col='ID', usecols=['Name', 'Age'], nrows=100, skiprows=10)

The selected line:

```python
df_custom = pd.read_csv('data.csv', delimiter=';', header=2, index_col='ID', usecols=['Name', 'Age'], nrows=100, skiprows=10)
```

 Explanation:

This command reads a CSV file (`data.csv`) into a Pandas DataFrame while applying several options:

1. **`delimiter=';'`** – Specifies that the values in the CSV file are separated by semicolons (`;`) instead of commas (`,`). Useful for European-style CSVs.
2. **`header=2`** – Treats the third row (0-based index) of the CSV file as the header row, which contains column names.
3. **`index_col='ID'`** – Uses the column named `'ID'` as the index for the DataFrame.
4. **`usecols=['Name', 'Age']`** – Reads only the specified columns (`'Name'` and `'Age'`), ignoring other columns.
5. **`nrows=100`** – Loads only the first 100 rows of data.
6. **`skiprows=10`** – Skips the first 10 rows before reading the data.

This setup is useful when dealing with large datasets where you need to load only relevant portions efficiently. 🚀

In [16]:
#Indexing and Slicing

#Using .iloc[] (Position-based)
df1.iloc[0]  # First row
df1.iloc[:, 0]  # First column
df1.iloc[0, 0]  # First element
df1.iloc[:2, 1:]  # Slicing rows and columns

Unnamed: 0,Age,City
0,25,Mumbai
1,30,Kolkata


In [17]:
#Using .loc[] (Label-based)
df1.loc[0, 'Name']  # Specific element
df1.loc[df1['Age'] > 20, ['Name', 'City']]  # Conditional filtering

Unnamed: 0,Name,City
0,Alice,Mumbai
1,Bob,Kolkata
3,Dawood,Bengaluru


In [18]:
#DataFrame Operations

#Column Operations
df1['New Column'] = df1['Age'] * 2

#Handling NaN Values
df1.fillna(0)  # Replace NaN with 0
df1.dropna()  # Remove NaN rows

#Apply Functions
df1['Age Category'] = df1['Age'].apply(lambda x: 'Adult' if x >= 18 else 'Minor')


In [19]:
#Merging DataFrames
df_merged = pd.merge(df1, df2, on='Name', how='inner')  # Inner join
df_concat_rows = pd.concat([df1, df2], axis=0)  # Stack rows
df_concat_cols = pd.concat([df1, df2], axis=1)  # Stack columns

In [24]:
#Grouping and Aggregation

#Grouping and Summarization
department_sum = df1.groupby('City').sum()
department_count = df1.groupby('City').count()
department_sum
df1

Unnamed: 0,Name,Age,City,New Column,Age Category
0,Alice,25,Mumbai,50,Adult
1,Bob,30,Kolkata,60,Adult
2,Chandni,18,Delhi,36,Adult
3,Dawood,22,Bengaluru,44,Adult


In [25]:
#Pivot Table
df_pivot = df1.pivot(index='City', columns='Name', values='Age')
df_pivot

Name,Alice,Bob,Chandni,Dawood
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bengaluru,,,,22.0
Delhi,,,18.0,
Kolkata,,30.0,,
Mumbai,25.0,,,


In [26]:
# summing values in pivot tables: 
df_pivot_sum = df1.pivot_table(index='City', columns='Name', values='Age', aggfunc='sum')

#Averaging Values in a Pivot Table
df_pivot_avg = df1.pivot_table(index='City', columns='Name', values='Age', aggfunc='mean')