<a href="https://colab.research.google.com/github/aaditya9803/ml/blob/main/Clothing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing from the Google Drive

In [None]:
#Giving access to the metroPT3.csv in google coolab
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
#2.1 Get the file
downloaded = drive.CreateFile({'id':'120IczLR6Dua0FZeFRHAd5PNBAJsWUIjw'})
downloaded.GetContentFile('clothing.csv')

## Importing Python Libraries

In [302]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import xgboost as xgb
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.svm import LinearSVC
import tensorflow as tf
from tensorflow import keras

## Preparing the Dataset

In [303]:
train_data = pd.read_csv(('clothing.csv'), sep=';', header=0)
df = pd.DataFrame(train_data)

In [304]:
df

Unnamed: 0,Height(Centimeter),Weight(Kilograms),Gender,BMI,Skin Color,Clothes Color,Pants Color
0,167.089607,51.252494,Female,Underweight,"(226, 165, 137)","(19, 165, 16)","(24, 212, 123)"
1,181.648633,61.909547,Male,Ideal,"(204, 150, 103)","(55, 238, 252)","(101, 131, 136)"
2,176.272800,69.411778,Male,Ideal,"(226, 165, 137)","(58, 67, 207)","(27, 41, 168)"
3,173.270164,64.562199,Male,Ideal,"(119, 78, 58)","(29, 140, 46)","(71, 123, 25)"
4,172.181037,65.452010,Male,Ideal,"(226, 165, 137)","(200, 235, 99)","(122, 118, 117)"
...,...,...,...,...,...,...,...
24995,176.535461,53.538008,Female,Underweight,"(226, 165, 137)","(109, 113, 120)","(251, 24, 50)"
24996,163.952580,54.518674,Female,Ideal,"(119, 78, 58)","(71, 217, 148)","(82, 156, 212)"
24997,164.334317,53.644285,Female,Ideal,"(246, 215, 197)","(132, 65, 237)","(109, 93, 216)"
24998,171.524117,59.995797,Male,Ideal,"(246, 215, 197)","(134, 229, 242)","(126, 23, 219)"


In [305]:
print(df['Gender'].unique())
print(df['BMI'].unique())

['Female' 'Male']
['Underweight' 'Ideal' 'Overweight']


In [306]:
df['Gender'] = df['Gender'].map({'Male':0, 'Female':1})
df['BMI'] = df['BMI'].map({'Underweight':0, 'Ideal':1, 'Overweight':2})

In [307]:
df['Skin Color']

Unnamed: 0,Skin Color
0,"(226, 165, 137)"
1,"(204, 150, 103)"
2,"(226, 165, 137)"
3,"(119, 78, 58)"
4,"(226, 165, 137)"
...,...
24995,"(226, 165, 137)"
24996,"(119, 78, 58)"
24997,"(246, 215, 197)"
24998,"(246, 215, 197)"


## Mapping RGB values to Popular Colors

In [308]:
# Map RGB values to closest skin tones

skin_tones = {
    "Very Fair Skin": (255, 219, 199),  # Lightest skin tones
    "Fair Skin": (242, 204, 187),       # Light, pinkish skin
    "Light Olive Skin": (224, 172, 105),# Light olive/peach tones
    "Medium Skin": (198, 134, 92),      # Tan, medium tones
    "Golden Skin": (210, 160, 120),     # Warm golden tones
    "Deep Tan Skin": (160, 100, 60),    # Deep olive/tan tones
    "Light Brown Skin": (140, 85, 50),  # Light brown shades
    "Medium Brown Skin": (115, 70, 45), # Medium brown tones
    "Dark Brown Skin": (90, 50, 30),    # Dark brown tones
    "Deep Dark Skin": (60, 40, 20),     # Deepest skin tones
}


def rgb_to_skin_tone(rgb):
    closest_skin_tone = None
    min_distance = float('inf')

    for tone_name, tone_rgb in skin_tones.items():
        # Calculate Euclidean distance
        distance = np.sqrt(
            (rgb[0] - tone_rgb[0])**2 +
            (rgb[1] - tone_rgb[1])**2 +
            (rgb[2] - tone_rgb[2])**2
        )
        if distance < min_distance:
            min_distance = distance
            closest_skin_tone = tone_name

    return closest_skin_tone


df['Skin Color'] = df['Skin Color'].apply(lambda x: eval(x) if isinstance(x, str) else x)

df['Skin Color'] = df['Skin Color'].apply(rgb_to_skin_tone)

print(df['Skin Color'].unique())
print(df['Skin Color'].value_counts())

['Golden Skin' 'Medium Skin' 'Medium Brown Skin' 'Very Fair Skin'
 'Fair Skin']
Skin Color
Very Fair Skin       11280
Medium Brown Skin     5461
Fair Skin             2849
Medium Skin           2714
Golden Skin           2696
Name: count, dtype: int64


In [309]:
#Mapping to nearest Clothes/Pants colour
popular_colors = {
    "Red": (255, 0, 0),
    "Green": (0, 255, 0),
    "Blue": (0, 0, 255),
    "Yellow": (255, 255, 0),
    "Cyan": (0, 255, 255),
    "Magenta": (255, 0, 255),
    "Black": (0, 0, 0),
    "White": (255, 255, 255),
    "Gray": (128, 128, 128),
    "Orange": (255, 165, 0),
    "Pink": (255, 192, 203),
    "Purple": (128, 0, 128),
    "Brown": (165, 42, 42)
}

def rgb_to_popular_color(rgb):

    closest_color = None
    min_distance = float('inf')

    for color_name, color_rgb in popular_colors.items():
        # Calculate Euclidean distance
        distance = np.sqrt(
            (rgb[0] - color_rgb[0])**2 +
            (rgb[1] - color_rgb[1])**2 +
            (rgb[2] - color_rgb[2])**2
        )
        if distance < min_distance:
            min_distance = distance
            closest_color = color_name

    return closest_color



# for color in input_colors:
#     print(f"RGB {color} -> Closest Popular Color: {rgb_to_popular_color(color)}")

df['Clothes Color'] = df['Clothes Color'].apply(lambda x: eval(x) if isinstance(x, str) else x) # Convert string to tuple
df['Clothes Color'] = df['Clothes Color'].apply(rgb_to_popular_color)
df['Pants Color'] = df['Pants Color'].apply(lambda x: eval(x) if isinstance(x, str) else x) # Convert string to tuple
df['Pants Color'] = df['Pants Color'].apply(rgb_to_popular_color)

print(df['Clothes Color'].unique())
print(df['Clothes Color'].value_counts())
print(df['Pants Color'].unique())
print(df['Pants Color'].value_counts())

['Green' 'Cyan' 'Blue' 'Yellow' 'Purple' 'Black' 'Pink' 'Gray' 'White'
 'Brown' 'Magenta' 'Orange' 'Red']
Clothes Color
Gray       8123
Brown      2707
Pink       2589
Purple     2345
Green      1607
Cyan       1569
Orange     1266
Blue       1232
Magenta    1054
Black      1052
Yellow      808
White       332
Red         316
Name: count, dtype: int64
['Green' 'Gray' 'Blue' 'White' 'Brown' 'Cyan' 'Purple' 'Black' 'Red'
 'Orange' 'Yellow' 'Pink' 'Magenta']
Pants Color
Gray       8195
Brown      2634
Pink       2573
Purple     2393
Cyan       1611
Green      1569
Orange     1260
Blue       1219
Black      1078
Magenta    1021
Yellow      816
White       316
Red         315
Name: count, dtype: int64


## Converting RGB value to a single value

In [310]:
# import ast  # For safely evaluating strings to tuples

# def parse_rgb(value):
#     """
#     Ensure the input is a tuple of integers.
#     If it's a string, convert it to a tuple.
#     """
#     if isinstance(value, str):
#         try:
#             # Convert string to tuple
#             return tuple(ast.literal_eval(value))
#         except (ValueError, SyntaxError):
#             # Return original value if parsing fails
#             return value
#     return value

# def rgb_to_single(rgb):
#     """
#     Convert an RGB tuple to a single integer.
#     """
#     if isinstance(rgb, tuple) and len(rgb) == 3 and all(isinstance(x, int) for x in rgb):
#         r, g, b = rgb
#         return (r << 16) + (g << 8) + b
#     else:
#         return rgb  # Return unchanged if input is invalid

# def single_to_rgb(value):
#     """
#     Convert a single integer back to an RGB tuple.
#     """
#     if isinstance(value, int):
#         r = (value >> 16) & 255
#         g = (value >> 8) & 255
#         b = value & 255
#         return (r, g, b)
#     return value  # Return unchanged if input is invalid

# # Parse the RGB strings to tuples first
# df['Skin Color'] = df['Skin Color'].apply(parse_rgb)
# df['Clothes Color'] = df['Clothes Color'].apply(parse_rgb)
# df['Pants Color'] = df['Pants Color'].apply(parse_rgb)

# # Convert RGB tuples to single integers
# df['Skin Color'] = df['Skin Color'].apply(rgb_to_single)
# df['Clothes Color'] = df['Clothes Color'].apply(rgb_to_single)
# df['Pants Color'] = df['Pants Color'].apply(rgb_to_single)


In [311]:
df

Unnamed: 0,Height(Centimeter),Weight(Kilograms),Gender,BMI,Skin Color,Clothes Color,Pants Color
0,167.089607,51.252494,1,0,Golden Skin,Green,Green
1,181.648633,61.909547,0,1,Medium Skin,Cyan,Gray
2,176.272800,69.411778,0,1,Golden Skin,Blue,Blue
3,173.270164,64.562199,0,1,Medium Brown Skin,Green,Gray
4,172.181037,65.452010,0,1,Golden Skin,Yellow,Gray
...,...,...,...,...,...,...,...
24995,176.535461,53.538008,1,0,Golden Skin,Gray,Red
24996,163.952580,54.518674,1,1,Medium Brown Skin,Gray,Gray
24997,164.334317,53.644285,1,1,Very Fair Skin,Gray,Gray
24998,171.524117,59.995797,0,1,Very Fair Skin,White,Purple


In [312]:
##For Linear Regression
# df[['Skin_Red','Skin_Green','Skin_Blue']] = df['Skin Color'].str.strip('()').str.split(', ', expand=True)
# df['Skin_Red'] = df['Skin_Red'].astype(float)
# df['Skin_Green'] = df['Skin_Green'].astype(float)
# df['Skin_Blue'] = df['Skin_Blue'].astype(float)
# df[['Clothes_Red','Clothes_Green','Clothes_Blue']] = df['Clothes Color'].str.strip('()').str.split(', ', expand=True)
# df['Clothes_Red'] = df['Clothes_Red'].astype(float)
# df['Clothes_Green'] = df['Clothes_Green'].astype(float)
# df['Clothes_Blue'] = df['Clothes_Blue'].astype(float)
# df[['Pants_Red','Pants_Green','Pants_Blue']] = df['Pants Color'].str.strip('()').str.split(', ', expand=True)
# df['Pants_Red'] = df['Pants_Red'].astype(float)
# df['Pants_Green'] = df['Pants_Green'].astype(float)
# df['Pants_Blue'] = df['Pants_Blue'].astype(float)
# df.drop('Skin Color', axis=1, inplace=True)
# df.drop('Clothes Color', axis=1, inplace=True)
# df.drop('Pants Color', axis=1, inplace=True)
# df

##For Linear Regression
# X = df.drop(columns=df.columns[7:12]) #Features
# y_clothes = df[[df.columns[7],df.columns[8],df.columns[9]]] #Target
# y_pants = df[[df.columns[10],df.columns[11],df.columns[12]]] #Target
# X = df.iloc[:, 0:7]
# y_clothes = df.iloc[:, 7:10]
# y_pants = df.iloc[:, 10:13]

In [316]:
#Labelling
df['Skin Color'] = df['Skin Color'].astype(str)
df['Clothes Color'] = df['Clothes Color'].astype(str)
df['Pants Color'] = df['Pants Color'].astype(str)
label_encoder = LabelEncoder()
df['Skin Color'] = label_encoder.fit_transform(df['Skin Color'])
df['Clothes Color'] = label_encoder.fit_transform(df['Clothes Color'])
df['Pants Color'] = label_encoder.fit_transform(df['Pants Color'])
df

Unnamed: 0,Height(Centimeter),Weight(Kilograms),Gender,BMI,Skin Color,Clothes Color,Pants Color
0,167.089607,51.252494,1,0,1,5,5
1,181.648633,61.909547,0,1,3,3,4
2,176.272800,69.411778,0,1,1,1,1
3,173.270164,64.562199,0,1,2,5,4
4,172.181037,65.452010,0,1,1,12,4
...,...,...,...,...,...,...,...
24995,176.535461,53.538008,1,0,1,4,10
24996,163.952580,54.518674,1,1,2,4,4
24997,164.334317,53.644285,1,1,4,4,4
24998,171.524117,59.995797,0,1,4,11,9


In [317]:
# Logistic Regression
X = df.drop(columns=df.columns[5:7]) #Features
y_clothes = df[df.columns[5]] #Target
y_pants = df[df.columns[6]] #Target
y = pd.concat([y_clothes, y_pants], axis=1)

In [318]:
y

Unnamed: 0,Clothes Color,Pants Color
0,5,5
1,3,4
2,1,1
3,5,4
4,12,4
...,...,...
24995,4,10
24996,4,4
24997,4,4
24998,11,9


## Multi Regressor

In [319]:
# X = df.iloc[:, 0:7]
# y = df.iloc[:, 7:12]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = MultiOutputRegressor(RandomForestRegressor(random_state=42))

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

rmse = mean_squared_error(y_test, y_pred, squared=False)  # squared=False for RMSE

print(f"Root Mean Squared Error: {rmse}")

r2 = r2_score(y_test, y_pred)

print(f"R-squared: {r2}")

Mean Squared Error: 9.34871977
Root Mean Squared Error: 3.0575672867835966
R-squared: -0.12840000288824716




## Random Forest

In [320]:
X_train, X_test, y_train, y_test = train_test_split(pd.concat([X,y_pants], axis=1), y_clothes, test_size=0.5, random_state=42)
# Initialize Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Make predictions
y_pred = rf_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 0.33

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       499
           1       0.00      0.00      0.00       615
           2       0.00      0.00      0.00      1347
           3       0.00      0.00      0.00       790
           4       0.33      1.00      0.49      4102
           5       0.00      0.00      0.00       820
           6       0.00      0.00      0.00       527
           7       0.00      0.00      0.00       635
           8       0.00      0.00      0.00      1284
           9       0.00      0.00      0.00      1173
          10       0.00      0.00      0.00       163
          11       0.00      0.00      0.00       158
          12       0.00      0.00      0.00       387

    accuracy                           0.33     12500
   macro avg       0.03      0.08      0.04     12500
weighted avg       0.11      0.33      0.16     12500


Confusion Matrix:
[[   0    0    0    0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Linear Regression

In [321]:
X_train, X_test, y_train, y_test = train_test_split(X, y_pants, test_size=0.2, random_state=42)
# Step 4: Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Create and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 6: Make predictions using the trained model
y_pred = model.predict(X_test)

# Step 7: Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Output results
print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")
print(f"Predictions: {y_pred}")

Mean Squared Error: 8.318377537572399
R-squared Score: -0.0018192319419585434
Predictions: [5.0731277  5.07118444 5.07298456 ... 5.08082301 5.09714569 5.04493548]


## SVM

In [322]:
X_train, X_test, y_train, y_test = train_test_split(X, y_clothes, test_size=0.2, random_state=42)

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(X_train)
x_test_scaled = scaler.transform(X_test)

linear_svm_model = LinearSVC()
linear_svm_model.fit(x_train_scaled, y_train)

y_pred_linear_svm = linear_svm_model.predict(x_test_scaled)

accuracy_linear_svm = accuracy_score(y_test, y_pred_linear_svm)
# Set average to 'weighted' for multiclass classification
precision_linear_svm = precision_score(y_test, y_pred_linear_svm, average='weighted')
recall_linear_svm = recall_score(y_test, y_pred_linear_svm, average='weighted')
f1_linear_svm = f1_score(y_test, y_pred_linear_svm, average='weighted')
confusion_matrix_linear_svm = confusion_matrix(y_test, y_pred_linear_svm)
print("Accuracy:", accuracy_linear_svm)
print("Precision:", precision_linear_svm)
print("Recall :", recall_linear_svm)
print("F1 Score:", f1_linear_svm)
print("Confusion Matrix:\n", confusion_matrix_linear_svm)

Accuracy: 0.3354
Precision: 0.11249316
Recall : 0.3354
F1 Score: 0.16847859817283212
Confusion Matrix:
 [[   0    0    0    0  188    0    0    0    0    0    0    0    0]
 [   0    0    0    0  234    0    0    0    0    0    0    0    0]
 [   0    0    0    0  543    0    0    0    0    0    0    0    0]
 [   0    0    0    0  313    0    0    0    0    0    0    0    0]
 [   0    0    0    0 1677    0    0    0    0    0    0    0    0]
 [   0    0    0    0  335    0    0    0    0    0    0    0    0]
 [   0    0    0    0  201    0    0    0    0    0    0    0    0]
 [   0    0    0    0  243    0    0    0    0    0    0    0    0]
 [   0    0    0    0  499    0    0    0    0    0    0    0    0]
 [   0    0    0    0  475    0    0    0    0    0    0    0    0]
 [   0    0    0    0   65    0    0    0    0    0    0    0    0]
 [   0    0    0    0   72    0    0    0    0    0    0    0    0]
 [   0    0    0    0  155    0    0    0    0    0    0    0    0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [323]:
X_train, X_test, y_train, y_test = train_test_split(X, y_clothes, test_size=0.2, random_state=42)

# 2. Scale the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 3. Define the ANN model architecture
model = keras.Sequential([
    keras.layers.Input(shape=(X_train_scaled.shape[1],)),  # Input layer with the shape of your features
    keras.layers.Dense(64, activation='relu'),  # Hidden layer with 64 neurons and ReLU activation
    keras.layers.Dense(32, activation='relu'),  # Another hidden layer with 32 neurons and ReLU activation
    keras.layers.Dense(len(y_clothes.unique()), activation='softmax')  # Output layer with softmax activation for multi-class classification
])

# 4. Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Use sparse_categorical_crossentropy for integer labels
              metrics=['accuracy'])

# 5. Train the model
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32)  # Adjust epochs and batch_size as needed

# 6. Make predictions
y_pred = model.predict(X_test_scaled)
y_pred_labels = y_pred.argmax(axis=1)  # Get predicted labels

# 7. Evaluate the model
accuracy = accuracy_score(y_test, y_pred_labels)
precision = precision_score(y_test, y_pred_labels, average='weighted')
recall = recall_score(y_test, y_pred_labels, average='weighted')
f1 = f1_score(y_test, y_pred_labels, average='weighted')
confusion_mat = confusion_matrix(y_test, y_pred_labels)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:\n", confusion_mat)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.3222 - loss: 2.2727
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.3247 - loss: 2.2105
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.3194 - loss: 2.2129
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.3200 - loss: 2.2137
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.3263 - loss: 2.2025
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.3187 - loss: 2.2105
Epoch 7/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.3227 - loss: 2.2019
Epoch 8/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.3213 - loss: 2.2064
Epoch 9/10
[1m625/625[0m [32m━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
