<a href="https://colab.research.google.com/github/Yashasvi-30/AmazonClone/blob/main/Pandas-4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Load data in various formats
# Replace 'data.csv' and 'data.xlsx' with your dataset paths
try:
    csv_data = pd.read_csv('data.csv')  # Load CSV file
    print("\nCSV Data Loaded Successfully:")
    print(csv_data.head())

    excel_data = pd.read_excel('data.xlsx')  # Load Excel file
    print("\nExcel Data Loaded Successfully:")
    print(excel_data.head())
except FileNotFoundError:
    print("\nReplace 'data.csv' and 'data.xlsx' with actual file paths!")

# Using a simulated DataFrame for further steps
data = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [24, 27, 22, 32, 29],
    'Score': [85.5, 78.2, 92.3, 88.0, None],
    'Category': ['A', 'B', 'A', 'B', 'C']
})
print("\nSimulated DataFrame:\n", data)

# Exploring DataFrame attributes
print("\nDataFrame Attributes:")
print("Shape:", data.shape)
print("Columns:", data.columns)
print("Data Types:\n", data.dtypes)
print("\nFirst Few Rows:\n", data.head())
print("\nLast Few Rows:\n", data.tail())
print("\nData Info:")
print(data.info())
print("\nData Description:")
print(data.describe())

# Selecting columns and rows
print("\nSelecting Columns and Rows:")
print("Selecting 'Age' column:\n", data['Age'])
print("Selecting multiple columns:\n", data[['Name', 'Score']])
print("Selecting rows by index (iloc):\n", data.iloc[1:4])
print("Selecting rows by condition (loc):\n", data.loc[data['Age'] > 25])


In [None]:
import numpy as np

# Identifying missing values
print("\nIdentifying Missing Values:")
print("Is Null:\n", data.isnull())
print("Missing Value Count:\n", data.isnull().sum())

# Handling missing values
print("\nHandling Missing Values:")
data_filled = data.copy()
data_filled['Score'].fillna(data['Score'].mean(), inplace=True)  # Fill with mean
print("Filled Missing Values with Mean:\n", data_filled)

data_dropped = data.copy().dropna()  # Drop rows with missing values
print("Dropped Rows with Missing Values:\n", data_dropped)

data_interpolated = data.copy()
data_interpolated['Score'] = data_interpolated['Score'].interpolate()  # Interpolate
print("Interpolated Missing Values:\n", data_interpolated)

# Scaling techniques
print("\nScaling Techniques:")

# Min-Max Scaling
min_max_scaled = data_filled.copy()
min_max_scaled['Age'] = (data_filled['Age'] - data_filled['Age'].min()) / \
                        (data_filled['Age'].max() - data_filled['Age'].min())
print("Min-Max Scaled Data:\n", min_max_scaled)

# Z-Score Standardization
z_score_scaled = data_filled.copy()
z_score_scaled['Age'] = (data_filled['Age'] - data_filled['Age'].mean()) / data_filled['Age'].std()
print("Z-Score Scaled Data:\n", z_score_scaled)

# Creating dummy variables for categorical columns
print("\nCreating Dummy Variables:")
data_with_dummies = pd.get_dummies(data, columns=['Category'], drop_first=True)
print("Data with Dummy Variables:\n", data_with_dummies)
