In [50]:
# Pandas DataFrame - Practice Exercises

In [51]:
# # Make a Pandas DataFrame with two-dimensional list | Python
# Pandas DataFrame with Two-dimensional List
# There are several methods for creating a Pandas DataFrame with the two-dimensional list. In this context, we will explain some commonly used approaches.

# Using pd.DataFrame()
# Using pd.DataFrame.from_records()
# Using pd.DataFrame.from_dict()
# Using Specifying Data Types

In [52]:
import pandas as pd


In [53]:
list = [["aa", 11], ["bb",  22], ["cc", 33]]
# Using pd.DataFrame()
# Use it when: You have standard Python data structures (like dicts or lists) 
# and want full control over how they’re converted to a DataFrame.
df1 = pd.DataFrame(list)
display(df1)
coln = ["name", "id"]
df2 = pd.DataFrame(list, columns=coln)
display(df2)

Unnamed: 0,0,1
0,aa,11
1,bb,22
2,cc,33


Unnamed: 0,name,id
0,aa,11
1,bb,22
2,cc,33


In [54]:
# Using pd.DataFrame.from_records()
#This method is ideal for working with a list of records (like tuples, dicts, or objects). 
# It treats each item as a row of data.
# Use it when: Your data is structured like records (rows), 
# often from JSON, CSV, or database fetches.
records = [('Alice', 25), ('Bob', 30)]
df = pd.DataFrame.from_records(records, columns=['name', 'age'])


In [55]:
# pd.Dataframe.from_dict()
# Use it when: You have dictionary-based data 
# and want to explicitly control the conversion.
# If orient='index', the keys are treated as row indices.

data = {
    'name': ["aa","bb",'cc'],
    'age': [11,22,33],
    'locc': ['a','b','c']
}
df = pd.DataFrame.from_dict(data, orient='index')
display(df)
df1 = pd.DataFrame.from_dict(data)
df1

Unnamed: 0,0,1,2
name,aa,bb,cc
age,11,22,33
locc,a,b,c


Unnamed: 0,name,age,locc
0,aa,11,a
1,bb,22,b
2,cc,33,c


In [56]:
# Specifying Data Types
# No matter how you create your DataFrame, 
# you can specify data types using the dtype argument or by assigning types after creation:
try: 
    df = pd.DataFrame(data, dtype='float')
except Exception as e:
    print(e)
else:
    display(df)

#in this dataframe the col. include the str so it generate the error
#if all the data in from of numeric then we use this either we have to use column-specific

df_temp = pd.DataFrame(data)
# df_temp = df_temp.astype({'age': 'float'})
# df_temp

could not convert string to float: 'aa'


In [57]:
df_temp["new_col"] = ["hii", "this is" , "new col"]
df_temp["new_int"] = [10,20,30]
df_temp

Unnamed: 0,name,age,locc,new_col,new_int
0,aa,11,a,hii,10
1,bb,22,b,this is,20
2,cc,33,c,new col,30


In [58]:
df_temp = df_temp.astype({'age': 'float',
                          'new_int': 'complex',
                          'locc': 'bool'})
display(df_temp)

Unnamed: 0,name,age,locc,new_col,new_int
0,aa,11.0,True,hii,10.0+ 0.0j
1,bb,22.0,True,this is,20.0+ 0.0j
2,cc,33.0,True,new col,30.0+ 0.0j


In [59]:
df_temp.transpose()

Unnamed: 0,0,1,2
name,aa,bb,cc
age,11.0,22.0,33.0
locc,True,True,True
new_col,hii,this is,new col
new_int,(10+0j),(20+0j),(30+0j)


In [60]:
import pandas as pd
import numpy as np

data = [['Geek1', 28, 'Engineer'],
        ['Geek2', None, 'Data Scientist'],
        ['Geek3', 32, None]]

columns = ['Name', 'Age', 'Occupation']

df = pd.DataFrame(data, columns=columns)
df = df.replace({None: np.nan})
print(df)

    Name   Age      Occupation
0  Geek1  28.0        Engineer
1  Geek2   NaN  Data Scientist
2  Geek3  32.0             NaN


In [61]:
data = [['Geek1', 28, 'Engineer'],
        ['Geek2', 25, 'Data Scientist'],
        ['Geek3', '32', 'Manager']]  # Age represented as a string

col = ["name", 'Age', "Occupation"]
df = pd.DataFrame(data, columns=col)
df["Age"] = pd.to_numeric(df.Age, errors='coerce', downcast='integer')
display(df)

Unnamed: 0,name,Age,Occupation
0,Geek1,28,Engineer
1,Geek2,25,Data Scientist
2,Geek3,32,Manager


In [62]:
data = [('ANSH', 22, 9),
        ('SAHIL', 22, 6),
        ('JAYAN', 23, 8),
        ('AYUSHI', 21, 7),
        ('SPARSH', 20, 8) ]
df = pd.DataFrame.from_records(data, columns=['name', 'age', 'score'])
df

Unnamed: 0,name,age,score
0,ANSH,22,9
1,SAHIL,22,6
2,JAYAN,23,8
3,AYUSHI,21,7
4,SPARSH,20,8


In [63]:

df.columns = df.columns.str.capitalize()
df

Unnamed: 0,Name,Age,Score
0,ANSH,22,9
1,SAHIL,22,6
2,JAYAN,23,8
3,AYUSHI,21,7
4,SPARSH,20,8


In [64]:
df = df.transpose()
df

Unnamed: 0,0,1,2,3,4
Name,ANSH,SAHIL,JAYAN,AYUSHI,SPARSH
Age,22,22,23,21,20
Score,9,6,8,7,8


In [65]:
df.index = df.index.str.upper()
df

Unnamed: 0,0,1,2,3,4
NAME,ANSH,SAHIL,JAYAN,AYUSHI,SPARSH
AGE,22,22,23,21,20
SCORE,9,6,8,7,8


In [66]:
df = df.transpose()
df

Unnamed: 0,NAME,AGE,SCORE
0,ANSH,22,9
1,SAHIL,22,6
2,JAYAN,23,8
3,AYUSHI,21,7
4,SPARSH,20,8


## transrom() Vs Apply()

In [67]:
# transform() — Return shape matches original
# Works element-wise: each group returns something the same size as the input.

# Ideal when you want to add a new column to the original DataFrame.

# Output shape is always aligned with the original index.

df["score_mean"] = df.groupby('AGE')['SCORE'].transform('mean')
df

Unnamed: 0,NAME,AGE,SCORE,score_mean
0,ANSH,22,9,7.5
1,SAHIL,22,6,7.5
2,JAYAN,23,8,8.0
3,AYUSHI,21,7,7.0
4,SPARSH,20,8,8.0


In [68]:
# apply() — Flexible, but shape may change
# Can return aggregated results, Series, or even DataFrames.

# Used for complex row- or group-wise operations — stats, reshaping, custom summaries.

# Output shape can differ from input — great for custom group summaries or filtered results.

print(df.groupby('AGE')['SCORE'].apply(lambda x: x.mean()))

df['score_mean_apply']= df.groupby('AGE')['SCORE'].apply(lambda x: x.mean())
df

#you're using apply() on a GroupBy object, and it reduces the data (e.g. gives one result per group)
#  → that’s when alignment issues arise


AGE
20    8.0
21    7.0
22    7.5
23    8.0
Name: SCORE, dtype: float64


Unnamed: 0,NAME,AGE,SCORE,score_mean,score_mean_apply
0,ANSH,22,9,7.5,
1,SAHIL,22,6,7.5,
2,JAYAN,23,8,8.0,
3,AYUSHI,21,7,7.0,
4,SPARSH,20,8,8.0,


### Pivot Vs Pivot_table()

Pivoting is a powerful technique to reshape detailed (long-form) data into a summary table (wide-form) — ideal for spotting trends and comparing group statistics.

pivot() — Strict but Simple
- Best used when your data is perfectly clean, meaning there is one unique value for every combination of index and columns.

- Converts rows into columns for easier comparison.

- It is fast and straightforward, but throws an error if duplicate entries exist.

In [69]:
data = [('ANSH', 22, 9),
        ('SAHIL', 22, 6),
        ('JAYAN', 23, 8),
        ('AYUSHI', 21, 7),
        ('SPARSH', 20, 8) ]

df_pivot = pd.DataFrame(data, columns=['Team', 'Age', 'Score'])

a = df_pivot.pivot(index='Team',columns='Score', values='Age')
print(a)

Score      6     7     8     9
Team                          
ANSH     NaN   NaN   NaN  22.0
AYUSHI   NaN  21.0   NaN   NaN
JAYAN    NaN   NaN  23.0   NaN
SAHIL   22.0   NaN   NaN   NaN
SPARSH   NaN   NaN  20.0   NaN


In [73]:
df_pivot = pd.DataFrame({
    'Department': ['HR', 'HR', 'Sales', 'Sales', 'IT'],
    'Gender': ['M', 'F', 'M', 'F', 'M'],
    'Salary': [3000, 3200, 4000, 3900, 4500]
})

try:
    pivot = pd.pivot(df, index="Department", columns="Gender", values="Salary")
except Exception as e:
    print(e)
else:
    print(pivot)

#it gives like this because of in piivot there is a unique values are mandetory

'Department'


pivot_table() — Flexible and Powerful
- Works just like pivot(), but with aggregation support — great for handling duplicates or missing data.

- Lets you group data by one or more keys.

- Supports common aggregations like mean, sum, count, and even multiple functions.

- Accepts multiple columns in index, columns, and values.

In [76]:
pivot = df_pivot.pivot_table(index='Department', columns='Gender', values='Salary', aggfunc='mean')
pivot

Gender,F,M
Department,Unnamed: 1_level_1,Unnamed: 2_level_1
HR,3200.0,3000.0
IT,,4500.0
Sales,3900.0,4000.0
