### Importing necessary Libraries

In [1]:
# Importing pandas
import pandas as pd

### Creating DataFrame

In [None]:
# Method 1: From dictionary {}
df1 = pd.DataFrame({
    "a": [1,2,3],
    "b":[4,5,6],
    "c":[7,8,9]
}, index = [1,2,3])

print(df1)

   a  b  c
1  1  4  7
2  2  5  8
3  3  6  9


In [None]:
# Method 2: From lists of lists []
df2 = pd.DataFrame(
    [[1,2,3],
     [4,5,6],
     [7,8,9]],
     index = [1,2,3],
     columns = ['a','b','c']
)

print(df2)

   a  b  c
1  1  2  3
2  4  5  6
3  7  8  9


In [7]:
# Method 3: With MultiIndex
index = pd.MultiIndex.from_tuples(
    [('d', 1), ('e', 2)],
    names=['n', 'v']
)
df3 = pd.DataFrame({
    "a": [4, 5],
    "b": [7, 8],
    "c": [10, 11]
}, index=index)

print(df3)

     a  b   c
n v          
d 1  4  7  10
e 2  5  8  11


### Importing Dummy Dataset

In [17]:
df = pd.read_csv("Dummy_Dataset.csv")
df.head()

Unnamed: 0,Name,Age,Height_cm,Weight_kg,Department,Score
0,Alice,25,165,55,Sales,88
1,Bob,30,180,85,Engineering,92
2,Charlie,35,175,78,HR,95
3,Diana,40,160,60,Engineering,70
4,Eve,22,155,50,Sales,85


### Reshaping Data

In [None]:
# Melting : to transform a wide-format DataFrame into a long-format one
df_melted = pd.melt(df, id_vars = ["Name"], value_vars = ["Age", "Score"])
print(df_melted)

# id_vars=["Name"]: This column stays as-is (not "melted").
# value_vars=["Age", "Score"]: These columns are unpivoted — their names go into a new column called "variable", and their values go into a column called "value".

       Name variable  value
0     Alice      Age     25
1       Bob      Age     30
2   Charlie      Age     35
3     Diana      Age     40
4       Eve      Age     22
5     Frank      Age     29
6     Grace      Age     33
7      Hank      Age     38
8       Ivy      Age     26
9      Jack      Age     31
10    Alice    Score     88
11      Bob    Score     92
12  Charlie    Score     95
13    Diana    Score     70
14      Eve    Score     85
15    Frank    Score     79
16    Grace    Score     91
17     Hank    Score     68
18      Ivy    Score     76
19     Jack    Score     84


In [None]:
# Pivot
df_pivot = df[["Department","Score"]].pivot(columns = "Department", values = "Score")
print(df_pivot)

# pivot(columns="Department", values="Score") tries to turn Department values into columns.
# pivot_df = df.pivot(index="Employee", columns="Department", values="Score")

Department  Engineering    HR  Marketing  Sales
0                   NaN   NaN        NaN   88.0
1                  92.0   NaN        NaN    NaN
2                   NaN  95.0        NaN    NaN
3                  70.0   NaN        NaN    NaN
4                   NaN   NaN        NaN   85.0
5                   NaN  79.0        NaN    NaN
6                   NaN   NaN       91.0    NaN
7                   NaN   NaN       68.0    NaN
8                  76.0   NaN        NaN    NaN
9                   NaN   NaN        NaN   84.0


In [28]:
# Concat
df_concat = pd.concat([df.head(3), df.tail(3)],axis = 0).reset_index(drop = True)
print(df_concat)
# axis = 0 : stacks them vertically (row-wise)


      Name  Age  Height_cm  Weight_kg   Department  Score
0    Alice   25        165         55        Sales     88
1      Bob   30        180         85  Engineering     92
2  Charlie   35        175         78           HR     95
3     Hank   38        182         90    Marketing     68
4      Ivy   26        158         52  Engineering     76
5     Jack   31        177         80        Sales     84


### Subset Observation

In [31]:
# Filter rows
print("\nPeople older than 30:\n", df[df["Age"] > 30])


People older than 30:
       Name  Age  Height_cm  Weight_kg   Department  Score
2  Charlie   35        175         78           HR     95
3    Diana   40        160         60  Engineering     70
6    Grace   33        168         65    Marketing     91
7     Hank   38        182         90    Marketing     68
9     Jack   31        177         80        Sales     84


In [32]:
# Drop duplicates
print("\nDrop duplicate departments:\n" , df.drop_duplicates("Department"))


Drop duplicate departments:
       Name  Age  Height_cm  Weight_kg   Department  Score
0    Alice   25        165         55        Sales     88
1      Bob   30        180         85  Engineering     92
2  Charlie   35        175         78           HR     95
6    Grace   33        168         65    Marketing     91


In [58]:
# Drop columns
print("\nDrop columns:\n" , df.drop(columns = ["Height_cm" , "Weight_kg"]))


Drop columns:
       Name  Age   Department  Score
0    Alice   25        Sales     88
1      Bob   30  Engineering     92
2  Charlie   35           HR     95
3    Diana   40  Engineering     70
4      Eve   22        Sales     85
5    Frank   29           HR     79
6    Grace   33    Marketing     91
7     Hank   38    Marketing     68
8      Ivy   26  Engineering     76
9     Jack   31        Sales     84


In [37]:
# Random sample
print("\nRandom Sample\n:" ,df.sample(3))


Random Sample
:       Name  Age  Height_cm  Weight_kg Department  Score
2  Charlie   35        175         78         HR     95
0    Alice   25        165         55      Sales     88
7     Hank   38        182         90  Marketing     68


In [38]:
# Top 3 by score
print("\nTop 3 by score:\n" , df.nlargest(3, "Score"))


Top 3 by score:
       Name  Age  Height_cm  Weight_kg   Department  Score
2  Charlie   35        175         78           HR     95
1      Bob   30        180         85  Engineering     92
6    Grace   33        168         65    Marketing     91


In [39]:
# Bottom 3 by score
print("\nBottom 3 by score:\n" , df.nsmallest(3, "Score"))


Bottom 3 by score:
     Name  Age  Height_cm  Weight_kg   Department  Score
7   Hank   38        182         90    Marketing     68
3  Diana   40        160         60  Engineering     70
8    Ivy   26        158         52  Engineering     76


### Subset Columns

In [40]:
# Select multiple columns
print("\nSelect Name and Department:\n", df[["Name", "Department"]])


Select Name and Department:
       Name   Department
0    Alice        Sales
1      Bob  Engineering
2  Charlie           HR
3    Diana  Engineering
4      Eve        Sales
5    Frank           HR
6    Grace    Marketing
7     Hank    Marketing
8      Ivy  Engineering
9     Jack        Sales


In [41]:
# Regex match columns
print("\nRegex match ('_cm') columns:\n", df.filter(regex="_cm"))


Regex match ('_cm') columns:
    Height_cm
0        165
1        180
2        175
3        160
4        155
5        170
6        168
7        182
8        158
9        177


### Row + Column Subset

In [43]:
# iloc - index-based
print("\nRows 2 to 5:\n", df.iloc[2:5])


Rows 2 to 5:
       Name  Age  Height_cm  Weight_kg   Department  Score
2  Charlie   35        175         78           HR     95
3    Diana   40        160         60  Engineering     70
4      Eve   22        155         50        Sales     85


In [44]:
# loc - label-based
print("\nRows where Score > 90:\n", df.loc[df['Score'] > 90, ['Name', 'Score']])


Rows where Score > 90:
       Name  Score
1      Bob     92
2  Charlie     95
6    Grace     91


In [None]:
# at - single value - label based access
print("\nScore of Charlie:\n", df.at[2, 'Score'])

# index labeled 2


Score of Charlie:
 95


In [None]:
# iat - single value using index - position based access
print("\nHeight of 3rd person:\n", df.iat[2, 2])

# 2(row), 2 (column) — both are zero-based integer positions.
# it returns the value at 3rd row, 3rd column of the DataFrame.


Height of 3rd person:
 175


### Sorting, Renaming

In [47]:
# Sort by Score descending
print("\nSorted by Score Desc:\n", df.sort_values("Score", ascending=False))


Sorted by Score Desc:
       Name  Age  Height_cm  Weight_kg   Department  Score
2  Charlie   35        175         78           HR     95
1      Bob   30        180         85  Engineering     92
6    Grace   33        168         65    Marketing     91
0    Alice   25        165         55        Sales     88
4      Eve   22        155         50        Sales     85
9     Jack   31        177         80        Sales     84
5    Frank   29        170         73           HR     79
8      Ivy   26        158         52  Engineering     76
3    Diana   40        160         60  Engineering     70
7     Hank   38        182         90    Marketing     68


In [48]:
# Rename column
df_renamed = df.rename(columns={"Height_cm": "HeightCM"})
print("\nRenamed column:\n", df_renamed.head())


Renamed column:
       Name  Age  HeightCM  Weight_kg   Department  Score
0    Alice   25       165         55        Sales     88
1      Bob   30       180         85  Engineering     92
2  Charlie   35       175         78           HR     95
3    Diana   40       160         60  Engineering     70
4      Eve   22       155         50        Sales     85


In [49]:
# Reset index
print("\nReset index:\n", df.reset_index())


Reset index:
    index     Name  Age  Height_cm  Weight_kg   Department  Score
0      0    Alice   25        165         55        Sales     88
1      1      Bob   30        180         85  Engineering     92
2      2  Charlie   35        175         78           HR     95
3      3    Diana   40        160         60  Engineering     70
4      4      Eve   22        155         50        Sales     85
5      5    Frank   29        170         73           HR     79
6      6    Grace   33        168         65    Marketing     91
7      7     Hank   38        182         90    Marketing     68
8      8      Ivy   26        158         52  Engineering     76
9      9     Jack   31        177         80        Sales     84


### Using query()

In [50]:
print("\nQuery age > 30 and Score > 80:\n", df.query("Age > 30 and Score > 80"))
print()
print("\nQuery department starts with 'E':\n", df.query("Department.str.startswith('E')", engine="python"))


Query age > 30 and Score > 80:
       Name  Age  Height_cm  Weight_kg Department  Score
2  Charlie   35        175         78         HR     95
6    Grace   33        168         65  Marketing     91
9     Jack   31        177         80      Sales     84


Query department starts with 'E':
     Name  Age  Height_cm  Weight_kg   Department  Score
1    Bob   30        180         85  Engineering     92
3  Diana   40        160         60  Engineering     70
8    Ivy   26        158         52  Engineering     76


### Method Chaining

In [55]:
result = (
    df
    .rename(columns = {"Score" : "TestScore"})
    .query("Age > 30")
    .sort_values("TestScore" , ascending = False)
    .head(5)
    .reset_index()
)

print(result)

   index     Name  Age  Height_cm  Weight_kg   Department  TestScore
0      2  Charlie   35        175         78           HR         95
1      6    Grace   33        168         65    Marketing         91
2      9     Jack   31        177         80        Sales         84
3      3    Diana   40        160         60  Engineering         70
4      7     Hank   38        182         90    Marketing         68


### Logic & Regex

In [53]:
print("\nUsing logic: Find people in HR and Score > 80:\n", df[(df["Department"] == "HR") & (df["Score"] > 80)])



Using logic: Find people in HR and Score > 80:
       Name  Age  Height_cm  Weight_kg Department  Score
2  Charlie   35        175         78         HR     95


In [68]:
# This checks the entire DataFrame for missing (null/NaN) values.
print(df.isnull())

# This checks if the string "Name" is NOT null.
pd.notnull("Name")
# OR
df["Name"].notnull()

    Name    Age  Height_cm  Weight_kg  Department  Score
0  False  False      False      False       False  False
1  False  False      False      False       False  False
2  False  False      False      False       False  False
3  False  False      False      False       False  False
4  False  False      False      False       False  False
5  False  False      False      False       False  False
6  False  False      False      False       False  False
7  False  False      False      False       False  False
8  False  False      False      False       False  False
9  False  False      False      False       False  False


0    True
1    True
2    True
3    True
4    True
5    True
6    True
7    True
8    True
9    True
Name: Name, dtype: bool