In [15]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.neighbors import NearestNeighbors
from sklearn.compose import ColumnTransformer

In [17]:
# Load datasets
customers = pd.read_csv(r"C:\Users\abhis\Downloads\Customers.csv")
transactions = pd.read_csv(r"C:\Users\abhis\Downloads\Transactions.csv")
products = pd.read_csv(r"C:\Users\abhis\Downloads\Products.csv")

In [27]:
# Merge datasets on customer ID
data = pd.merge(customers, transactions, on='CustomerID')

# Convert date columns to datetime
data['SignupDate'] = pd.to_datetime(data['SignupDate'])
data['TransactionDate'] = pd.to_datetime(data['TransactionDate'])

# Extract relevant features from dates
data['SignupYear'] = data['SignupDate'].dt.year
data['SignupMonth'] = data['SignupDate'].dt.month
data['TransactionYear'] = data['TransactionDate'].dt.year
data['TransactionMonth'] = data['TransactionDate'].dt.month

# Drop original date columns
data.drop(['SignupDate', 'TransactionDate'], axis=1, inplace=True)

# Identify non-numeric columns
non_numeric_columns = data.select_dtypes(include=['object']).columns

# Convert categorical columns to string
data[non_numeric_columns] = data[non_numeric_columns].astype(str)

# Preprocess data: handle missing values (if any)
data.fillna(0, inplace=True)

# Convert non-numeric columns using OneHotEncoder
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), data.drop(non_numeric_columns, axis=1).columns),
        ('cat', OneHotEncoder(handle_unknown='ignore'), non_numeric_columns)
    ]
)

# Apply preprocessing
data_processed = preprocessor.fit_transform(data)

# Convert sparse matrix to dense
data_processed = data_processed.toarray()

# Fit Nearest Neighbors model
nn_model = NearestNeighbors(n_neighbors=3, algorithm='auto')
nn_model.fit(data_processed)

def find_similar_customers(user_info):
    # Convert user_info to a DataFrame to ensure it's 2D
    user_df = pd.DataFrame([user_info])
    
    # Convert date columns to datetime
    user_df['SignupDate'] = pd.to_datetime(user_df['SignupDate'])
    user_df['TransactionDate'] = pd.to_datetime(user_df['TransactionDate'])
    
    # Extract relevant features from dates
    user_df['SignupYear'] = user_df['SignupDate'].dt.year
    user_df['SignupMonth'] = user_df['SignupDate'].dt.month
    user_df['TransactionYear'] = user_df['TransactionDate'].dt.year
    user_df['TransactionMonth'] = user_df['TransactionDate'].dt.month
    
    # Drop original date columns
    user_df.drop(['SignupDate', 'TransactionDate'], axis=1, inplace=True)
    
    # Convert categorical columns to string
    user_df[non_numeric_columns] = user_df[non_numeric_columns].astype(str)
    
    # Preprocess the user_info using the same preprocessor
    user_processed = preprocessor.transform(user_df)
    user_processed = user_processed.toarray()
    
    # Find the 3 nearest neighbors
    distances, indices = nn_model.kneighbors(user_processed)
    
    # Get the recommended customer IDs
    recommended_customers = data.iloc[indices[0]]['CustomerID'].values
    return recommended_customers

# Example user information (replace with actual user data)
user_info = {
    'CustomerID': 12345,  # Replace with a valid CustomerID from your dataset
    'CustomerName': 'John Doe',
    'SignupDate': '2022-01-01',
    'ProductID': 101,
    'Quantity': 2,
    'Region': 'North',
    'TransactionID': 7890,
    'TotalValue': 500,
    'Price': 250,
    'age': 30,
    'income': 60000,
    'gender': 'Male',
    'location': 'Kalyan',
    'purchase_frequency': 10,
    'total_spent': 2000,
    'TransactionDate': '2022-01-01'
}

# Find similar customers
similar_customers = find_similar_customers(user_info)
print(f'Similar customers: {similar_customers}')


Similar customers: ['C0081' 'C0040' 'C0154']
