In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

In [None]:
csv_path = Path("laptop_price.csv")

if csv_path.exists():
    df = pd.read_csv(csv_path)
    print("Dataset loaded:", df.shape)
else:
    print("CSV not found. Please place 'laptop_price.csv' in the notebook directory.")
df.head()

## 1. Types of Data

In [None]:
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()

print("Categorical Columns:", categorical_cols)
print("Numeric Columns:", numeric_cols)

## 2. Balanced vs Imbalanced Dataset

In [None]:
price_bins = pd.qcut(df['Price_euros'], 4, labels=["Low","Medium","High","Very High"])
bin_counts = price_bins.value_counts().sort_index()

plt.pie(bin_counts, labels=bin_counts.index, autopct='%1.1f%%', startangle=90)
plt.title("Price Distribution by Quartiles")
plt.show()

bin_counts

## 3. Continuous Variables Statistics

In [None]:
for col in numeric_cols:
    s = df[col].dropna()
    print(f"--- {col} ---")
    print("Mean:", s.mean())
    print("Median:", s.median())
    print("Variance:", s.var())
    print("Std Dev:", s.std())
    print()

## 4. Frequency of Categorical Variables

In [None]:
for col in categorical_cols[:3]:  # show top 3 categorical as example
    print(f"--- {col} ---")
    print(df[col].value_counts(normalize=True).head())
    print()

# Example bar chart for Company
df['Company'].value_counts().head(10).plot(kind='bar', figsize=(8,4), title="Top 10 Companies")
plt.show()

## 5. Line Graph of Prices

In [None]:
df['Price_euros'].sort_values().reset_index(drop=True).plot(kind='line', figsize=(8,4), title="Sorted Laptop Prices")
plt.ylabel("Price (Euros)")
plt.show()

## 6. Correlation Heatmap

In [None]:
corr = df[numeric_cols].corr()
fig, ax = plt.subplots(figsize=(6,5))
cax = ax.matshow(corr, cmap='coolwarm')
fig.colorbar(cax)
ax.set_xticks(range(len(corr.columns)))
ax.set_yticks(range(len(corr.columns)))
ax.set_xticklabels(corr.columns, rotation=90)
ax.set_yticklabels(corr.columns)
plt.show()

corr

## 7. Encoding Categorical Features

In [None]:
df['Ram_num'] = pd.to_numeric(df['Ram'].str.replace('GB','', regex=False), errors='coerce')
encoded = pd.get_dummies(df, columns=['Company','TypeName','Cpu','Gpu','OpSys'], drop_first=True)
encoded.head()

## 8. Missing Values Handling

In [None]:
print(df.isna().sum())

for c in df.select_dtypes(include=[np.number]).columns:
    df[c].fillna(df[c].median(), inplace=True)

for c in df.select_dtypes(include='object').columns:
    if df[c].isna().any():
        df[c].fillna(df[c].mode()[0], inplace=True)

print("After imputation:")
print(df.isna().sum())

## 10. GitHub Repository

Link: [https://github.com/your-username/laptop-price-assignment](https://github.com/your-username/laptop-price-assignment)

### How to upload:
```bash
git init
git add Laptop_Price_Assignment_Assignment.ipynb laptop_price.csv
git commit -m "Laptop price dataset analysis assignment"
git branch -M main
git remote add origin https://github.com/your-username/laptop-price-assignment.git
git push -u origin main
```