# Portfolio Alchemy

In [50]:
import pandas as pd
data = pd.read_csv("/Users/neerajkumar/Downloads/projecttt_datasets/companies_share_price.csv")

In [51]:
data.head()

Unnamed: 0,Name,Market Cap,Alpha,Beta,5Y CAGR,D/E,EPS (Q)
0,RELIANCE,1645671.87,-4.86,1.0,12.69,0.5,13.7
1,TCS,1414726.49,-4.4,0.61,12.27,0.09,34.21
2,HDFCBANK,1298771.0,-5.15,1.1,6.46,0.0,23.11
3,BHARTIARTL,1026583.48,23.02,0.64,26.47,2.04,25.54
4,ICICIBANK,882070.18,7.85,1.27,18.21,0.0,18.26


In [52]:
data = data.apply(lambda col: col.map(lambda x: '' if isinstance(x, str) and x.strip() == '-' else x))

In [53]:
data = data.replace("", pd.NA)

In [54]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Name        500 non-null    object 
 1   Market Cap  500 non-null    object 
 2   Alpha       473 non-null    object 
 3   Beta        448 non-null    object 
 4   5Y CAGR     418 non-null    object 
 5   D/E         500 non-null    float64
 6   EPS (Q)     500 non-null    float64
dtypes: float64(2), object(5)
memory usage: 27.5+ KB


In [55]:
data.isnull().sum()

Name           0
Market Cap     0
Alpha         27
Beta          52
5Y CAGR       82
D/E            0
EPS (Q)        0
dtype: int64

# Data Cleaning

In [56]:
data["Alpha"] = data["Alpha"].fillna("X")

In [58]:
data["Beta"] = data["Beta"].fillna("X")

In [60]:
data["5Y CAGR"] = data["5Y CAGR"].fillna("X")

# Model Building

In [84]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

In [72]:
def market_cap_rating(x):
    x = float(str(x).replace(",", ""))  # Convert to float after removing commas
    return 5 if x >= 1000000 else 4 if x >= 400000 else 3 if x >= 80000 else 2 if x >= 16000 else 1


In [73]:
def alpha_rating(x):
    if x == "X":
        return 3
    x = float(x)  # Convert to float only if not "X"
    return 5 if x >= 5 else 4 if x >= 3 else 3 if x >= 0 else 2 if x >= -3 else 1

def beta_rating(x):
    if x == "X":
        return 3
    x = float(x)  
    return 5 if x <= 0.5 else 4 if x <= 1 else 3 if x <= 1.5 else 2 if x <= 2 else 1

def cagr_rating(x):
    if x == "X":
        return 3
    x = float(x)  
    return 5 if x >= 20 else 4 if x >= 15 else 3 if x >= 10 else 2 if x >= 5 else 1


In [74]:
def de_rating(x):
    x = float(x)  # Convert to float
    return 5 if x <= 0.5 else 4 if x <= 1 else 3 if x <= 2 else 2 if x <= 3 else 1

def eps_rating(x):
    x = float(x)  
    return 5 if x >= 5 else 4 if x >= 3 else 3 if x >= 1 else 2 if x > 0 else 1


In [75]:
data["Market_Cap_Rating"] = data["Market Cap"].apply(market_cap_rating)
data["Alpha_Rating"] = data["Alpha"].apply(alpha_rating)
data["Beta_Rating"] = data["Beta"].apply(beta_rating)
data["CAGR_Rating"] = data["5Y CAGR"].apply(cagr_rating)
data["D/E_Rating"] = data["D/E"].apply(de_rating)
data["EPS_Rating"] = data["EPS (Q)"].apply(eps_rating)


In [78]:
# Define Capability Categories Based on Ratings
def define_capability(row):
    avg_rating = np.mean([
        row["Market_Cap_Rating"],
        row["Alpha_Rating"],
        row["Beta_Rating"],
        row["CAGR_Rating"],
        row["D/E_Rating"],
        row["EPS_Rating"]
    ])
    if avg_rating >= 4.5:
        return "Safe"
    elif avg_rating >= 3.5:
        return "Moderate"
    elif avg_rating >= 2.5:
        return "Risky"
    else:
        return "High Risky"

data["Capability"] = data.apply(define_capability, axis=1)


In [81]:
# Prepare Data for Model Training
X = data[["Market_Cap_Rating", "Alpha_Rating", "Beta_Rating", "CAGR_Rating", "D/E_Rating", "EPS_Rating"]]
y = data["Capability"]

# Encode Target Variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


In [82]:
# Train Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)


In [85]:
# Predictions
y_pred = rf_model.predict(X_test)

# Convert Predictions Back to Labels
y_pred_labels = label_encoder.inverse_transform(y_pred)

# Model Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.88

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.44      0.62         9
           1       0.96      0.91      0.93        55
           2       0.74      0.94      0.83        31
           3       1.00      1.00      1.00         5

    accuracy                           0.88       100
   macro avg       0.93      0.82      0.84       100
weighted avg       0.90      0.88      0.88       100



In [86]:
print(data)

           Name    Market Cap   Alpha   Beta 5Y CAGR    D/E  EPS (Q)  \
0      RELIANCE  16,45,671.87   -4.86      1   12.69   0.50    13.70   
1           TCS  14,14,726.49    -4.4   0.61   12.27   0.09    34.21   
2      HDFCBANK  12,98,771.00   -5.15    1.1    6.46   0.00    23.11   
3    BHARTIARTL  10,26,583.48   23.02   0.64   26.47   2.04    25.54   
4     ICICIBANK   8,82,070.18    7.85   1.27   18.21   0.00    18.26   
5          INFY   7,63,490.30   -3.65   0.81    18.4   0.09    16.43   
6          SBIN   6,49,399.98    2.64    1.3   17.32   0.00    21.12   
7    HINDUNILVR   5,45,457.61   -4.55   0.31    0.33   0.03    12.70   
8    BAJFINANCE   5,20,752.38   -2.76   1.89   11.89   3.82    68.63   
9           ITC   5,12,636.29   18.37   0.61   15.37   0.00     3.74   
10         LICI   4,96,385.82   -7.56      X       X   0.00    17.40   
11      HCLTECH   4,61,585.97    7.68    0.9    22.7   0.08    16.94   
12           LT   4,48,808.99    9.96    1.1   20.36   1.13    2