In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
pip install flask flask-socketio pandas numpy scikit-learn transformers joblib aif360

Collecting flask-socketio
  Downloading Flask_SocketIO-5.5.1-py3-none-any.whl.metadata (2.6 kB)
Collecting aif360
  Downloading aif360-0.6.1-py3-none-any.whl.metadata (5.0 kB)
Collecting python-socketio>=5.12.0 (from flask-socketio)
  Downloading python_socketio-5.12.1-py3-none-any.whl.metadata (3.2 kB)
Collecting bidict>=0.21.0 (from python-socketio>=5.12.0->flask-socketio)
  Downloading bidict-0.23.1-py3-none-any.whl.metadata (8.7 kB)
Collecting python-engineio>=4.11.0 (from python-socketio>=5.12.0->flask-socketio)
  Downloading python_engineio-4.11.2-py3-none-any.whl.metadata (2.2 kB)
Collecting simple-websocket>=0.10.0 (from python-engineio>=4.11.0->python-socketio>=5.12.0->flask-socketio)
  Downloading simple_websocket-1.1.0-py3-none-any.whl.metadata (1.5 kB)
Collecting wsproto (from simple-websocket>=0.10.0->python-engineio>=4.11.0->python-socketio>=5.12.0->flask-socketio)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading Flask_SocketIO-5.5.1-py3-none-any

In [3]:
import os
import pandas as pd
import numpy as np
from flask import Flask, request, jsonify, render_template
from flask_socketio import SocketIO
from transformers import pipeline
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.neighbors import NearestNeighbors
from aif360.algorithms.preprocessing import Reweighing
from aif360.datasets import StandardDataset
import joblib

# Initialize Flask App
app = Flask(__name__)
socketio = SocketIO(app)

# Data Processing Class
class DataProcessor:
    def __init__(self, file1, file2, file3, file4):
        self.df1 = pd.read_csv(file1)
        self.df2 = pd.read_csv(file2)
        self.df3 = pd.read_csv(file3)
        self.df4 = pd.read_csv(file4)
        self.df = pd.concat([self.df1, self.df2, self.df3, self.df4], ignore_index=True)
        self.label_encoders = {}
        self.scaler = MinMaxScaler()

    def encode_features(self):
        categorical_columns = ['Industry', 'Financial Needs', 'Preferences', 'Gender', 'Location', 'Interest', 'Education', 'Occupation', 'Platform', 'Transaction Type', 'Category', 'Payment_mode']
        for col in categorical_columns:
            if col in self.df.columns:
                le = LabelEncoder()
                self.df[col] = le.fit_transform(self.df[col].astype(str))
                self.label_encoders[col] = le

    def normalize_features(self):
        numeric_cols = ['Age', 'Incomeper year(in dollar)', 'Revenue(in dollars)', 'Amount(in Dollar)', 'Sentiment_Score']
        for col in numeric_cols:
            if col in self.df.columns:
                self.df[col].fillna(self.df[col].median(), inplace=True)  # Handle NaN values
        self.df[numeric_cols] = self.scaler.fit_transform(self.df[numeric_cols])

    def process(self):
        self.encode_features()
        self.normalize_features()
        return self.df

# Sentiment Analysis Class
class SentimentAnalyzer:
    def __init__(self):
        self.model = pipeline("sentiment-analysis")

    def analyze(self, text):
        return self.model(text)[0]['label'] if pd.notna(text) else 'neutral'

# Recommendation Model Class
class RecommendationSystem:
    def __init__(self, df):
        self.df = df
        self.model = NearestNeighbors(n_neighbors=5, metric='cosine')

    def train(self):
        feature_cols = ['Age', 'Incomeper year(in dollar)', 'Revenue(in dollars)', 'Amount(in Dollar)', 'Sentiment_Score']
        if all(col in self.df.columns for col in feature_cols):
            self.df[feature_cols] = self.df[feature_cols].fillna(self.df[feature_cols].median())  # Handle NaN values
            self.model.fit(self.df[feature_cols])
            joblib.dump(self.model, 'recommendation_model.pkl')

    def load_model(self):
        if os.path.exists('recommendation_model.pkl'):
            self.model = joblib.load('recommendation_model.pkl')

    def get_recommendations(self, user_id):
        if user_id not in self.df['Customer_Id'].values:
            return []
        user_data = self.df[self.df['Customer_Id'] == user_id].iloc[:, :-1]
        distances, indices = self.model.kneighbors(user_data)
        return self.df.iloc[indices[0]]['Product_Id'].tolist()



# Bias Detection Class
class BiasDetector:
    def __init__(self, df):
        self.df = df.copy()

        # Encode Gender: Male → 1, Female → 0
        if 'Gender' in self.df.columns:
            self.df['Gender'] = self.df['Gender'].map({'Male': 1, 'Female': 0})

        # Define privileged (Male) & unprivileged (Female) groups
        privileged_groups = [{'Gender': 1}]
        unprivileged_groups = [{'Gender': 0}]

        # Ensure no NaN values
        self.df.fillna({'Gender': 0}, inplace=True)

        # Create StandardDataset
        self.dataset = StandardDataset(
            self.df,
            label_name='Sentiment_Label',
            protected_attribute_names=['Gender'],
            favorable_classes=['positive'],  # Adjust this based on actual labels
            privileged_classes=[[1]],  # Male = 1
        )

        self.bias_model = Reweighing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)

    def detect_bias(self):
        return self.bias_model.fit_transform(self.dataset)


# Load and process data
data_processor = DataProcessor('/content/drive/MyDrive/Gen_Ai_demo/Customer_Profile_Org.csv', '/content/drive/MyDrive/Gen_Ai_demo/Customer_Profile_Individual.csv', '/content/drive/MyDrive/Gen_Ai_demo/Social_Media_Sentiment.csv', '/content/drive/MyDrive/Gen_Ai_demo/Transaction_History.csv')
df = data_processor.process()

# Apply sentiment analysis
sentiment_analyzer = SentimentAnalyzer()
df['Sentiment_Label'] = df['Content'].apply(lambda x: sentiment_analyzer.analyze(x))

# Train and load recommendation model
recommender = RecommendationSystem(df)
recommender.train()
recommender.load_model()

# Detect bias
bias_detector = BiasDetector(df)
adjusted_dataset = bias_detector.detect_bias()

@app.route('/')
def home():
    return render_template('dashboard.html')

@app.route('/recommend', methods=['POST'])
def recommend():
    user_id = request.json['Customer_Id']
    recommendations = recommender.get_recommendations(user_id)
    socketio.emit('update_recommendations', {'recommendations': recommendations})
    return jsonify({'recommendations': recommendations})


if __name__ == '__main__':
    socketio.run(app, host="0.0.0.0", port=5000, debug=True, allow_unsafe_werkzeug=True)




pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'
pip install 'aif360[inFairness]'
pip install 'aif360[Reductions]'
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  self.df[col].fillna(self.df[col].median(), inplace=True)  # Handle NaN values
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu
  self.w_p_fav = n_fav*n_p / (n*n_p_fav)
  self.w_p_unfav = n_unfav*n_p / (n*n_p_unfav)
  self.w_up_fav = n_fav*n_up / (n*n_up_fav)
  self.w_up_unfav = n_unfav*n_up / (n*n_up_unfav)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat


In [None]:
flask run --host=0.0.0.0 --port=5000