# 🛍️ Meesho RTO Analysis and Prediction System (SQL + Python + ML)
This project analyzes product returns (RTOs) using SQL + ML, based on real order-level Meesho data.

## 📁 Step 1: Upload and Load Data

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
import pandas as pd

# Replace with your file names after upload
df1 = pd.read_csv("/content/meesho ForwardReports.csv")
df2 = pd.read_csv("/content/meesho Orders Aug.csv")

df1.head()

## 🔄 Step 2: Merge Both Datasets on Sub Order Number

In [None]:
# Standardize column names
df1.rename(columns={'sub_order_num': 'Sub Order No'}, inplace=True)
merged_df = pd.merge(df2, df1, on='Sub Order No', how='inner')
merged_df.shape

## 🧮 Step 3: Run SQL Queries on Merged Data

In [None]:
import sqlite3

# Create in-memory SQLite DB and table
conn = sqlite3.connect(":memory:")
merged_df.to_sql("orders", conn, index=False)

# Example query: RTO count by state
pd.read_sql_query("""
SELECT [Customer State], COUNT(*) AS rto_count
FROM orders
WHERE [Reason for Credit Entry] = 'RTO_COMPLETE'
GROUP BY [Customer State]
ORDER BY rto_count DESC
""", conn)

## 📊 Step 4: Visualize RTO Patterns

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Plot RTOs by Size
sns.countplot(data=merged_df[merged_df['Reason for Credit Entry'] == 'RTO_COMPLETE'], x='Size')
plt.title("RTOs by Product Size")
plt.xticks(rotation=45)
plt.show()

## 🤖 Step 5: Predict RTO using ML

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Label encoding
merged_df['is_rto'] = merged_df['Reason for Credit Entry'].apply(lambda x: 1 if x == 'RTO_COMPLETE' else 0)
X = pd.get_dummies(merged_df[['Size', 'Customer State', 'Quantity']], drop_first=True)
y = merged_df['is_rto']

# Train/Test split and model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = RandomForestClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred))