# Creating DataFrames

Specify values for each column

In [7]:
import pandas as pd

data = {
    'Day': ['Mon', 'Tue', 'Wed', 'Thu', 'Fri'],
    'Vehicle_Count': [100, 120, 130, 140, 150],
    'Accidents': [1, 0, 2, 1, 0]
}
df = pd.DataFrame(data, index=['A', 'B', 'C', 'D', 'E'])
print(df)

   Day  Vehicle_Count  Accidents
A  Mon            100          1
B  Tue            120          0
C  Wed            130          2
D  Thu            140          1
E  Fri            150          0


Specify values for each row

In [2]:
import pandas as pd

# Define data row-wise
data = [
    ['Mon', 100, 1],
    ['Tue', 120, 0],
    ['Wed', 130, 2],
    ['Thu', 140, 1],
    ['Fri', 150, 0]
]

# Create DataFrame and specify column names
df = pd.DataFrame(data, columns=['Day', 'Vehicle_Count', 'Accidents'], index=[1,2,3,4,5])

print(df)

   Day  Vehicle_Count  Accidents
1  Mon            100          1
2  Tue            120          0
3  Wed            130          2
4  Thu            140          1
5  Fri            150          0


Create DataFrame with a MultiIndex/Hierarchical index

In [5]:
import pandas as pd

# Step 1: Define the data for each column (like normal)
df = pd.DataFrame(
    {
        "a": [4, 5, 6],
        "b": [7, 8, 9],
        "c": [10, 11, 12]
    }, index=pd.MultiIndex.from_tuples([('d',1),('d',2),('e',2)], names=['n', 'v']))

print(df)

     a  b   c
n v          
d 1  4  7  10
  2  5  8  11
e 2  6  9  12


Accessing Data in MultiIndex

In [8]:
print(df.loc[('d',1)])      #Row where n='d' and v=1
print(df.loc[('e',2)])      #Row where n='e' and v=2

a     4
b     7
c    10
Name: (d, 1), dtype: int64
a     6
b     9
c    12
Name: (e, 2), dtype: int64


# Reshaping Data

#### 1. `pd.melt(df)` – Gather columns into rows  
Purpose: Converts wide data into long data.

In [9]:
import pandas as pd

df = pd.DataFrame({
    'Name': ['Alice', 'Bob'],
    'Math': [90, 80],
    'Science': [85, 95]
})
df

Unnamed: 0,Name,Math,Science
0,Alice,90,85
1,Bob,80,95


In [11]:
melted = pd.melt(
    df,
    id_vars=['Name'],            # The column to keep fixed
    var_name='Subject',          # New column name for old column headers (Math, Science)
    value_name='Score'           # New column name for values under Math/Science
)

melted

Unnamed: 0,Name,Subject,Score
0,Alice,Math,90
1,Bob,Math,80
2,Alice,Science,85
3,Bob,Science,95


#### `2. df.pivot(columns='var', values='val')` – Spread rows into columns

In [12]:
import pandas as pd

data = {
    'Salesperson': ['Alice', 'Alice', 'Bob', 'Bob', 'Charlie', 'Charlie'],
    'Month': ['Jan', 'Feb', 'Jan', 'Feb', 'Jan', 'Feb'],
    'Sales': [250, 300, 200, 220, 150, 180]
}

df = pd.DataFrame(data)
print(df)


  Salesperson Month  Sales
0       Alice   Jan    250
1       Alice   Feb    300
2         Bob   Jan    200
3         Bob   Feb    220
4     Charlie   Jan    150
5     Charlie   Feb    180


In [15]:
df_pivot=df.pivot(index='Salesperson', columns='Month', values='Sales')
print(df_pivot)

Month        Feb  Jan
Salesperson          
Alice        300  250
Bob          220  200
Charlie      180  150


#### 3. `pd.concat([df1, df2])` – Append rows

In [16]:
import pandas as pd

# Data for Day 1 orders
df1 = pd.DataFrame({
    'OrderID': [1, 2],
    'Coffee': ['Latte', 'Espresso'],
    'Pastry': ['Croissant', 'Muffin']
})

print(df1)

# Data for Day 2 orders
df2 = pd.DataFrame({
    'OrderID': [3, 4],
    'Coffee': ['Cappuccino', 'Americano'],
    'Pastry': ['Donut', 'Scone']
})

print(df2)

   OrderID    Coffee     Pastry
0        1     Latte  Croissant
1        2  Espresso     Muffin
   OrderID      Coffee Pastry
0        3  Cappuccino  Donut
1        4   Americano  Scone


concatenate two dataframe by appending rows

In [17]:
df_combined=pd.concat([df1, df2])
print(df_combined)

   OrderID      Coffee     Pastry
0        1       Latte  Croissant
1        2    Espresso     Muffin
0        3  Cappuccino      Donut
1        4   Americano      Scone


####  4. `pd.concat([df1, df2], axis=1)` – Append columns  
Combines DataFrames horizontally (column-wise).

In [27]:
import pandas as pd

# DataFrame 1: employee names and departments
df1 = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Department': ['HR', 'Finance', 'IT']
})

print(df1)
print()

# DataFrame 2: salaries and experience
df2 = pd.DataFrame({
    'Salary': [70000, 80000, 65000],
    'Experience': [5, 8, 3]
})

print(df2)
print()

# Concatenate df1 and df2 side by side (columns)
result = pd.concat([df1, df2], axis=1)

print(result)


      Name Department
0    Alice         HR
1      Bob    Finance
2  Charlie         IT

   Salary  Experience
0   70000           5
1   80000           8
2   65000           3

      Name Department  Salary  Experience
0    Alice         HR   70000           5
1      Bob    Finance   80000           8
2  Charlie         IT   65000           3
