In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from scipy.cluster.hierarchy import dendrogram, linkage

In [None]:
#Load the dataset
file_path = '/mnt/data/List of most-followed Instagram accounts 2.csv'
df = pd.read_csv(file_path)

In [None]:
#Preprocess the data
df.rename(columns={
    'Followers(millions)[2]': 'Followers',
    'Profession/Activity': 'Profession',
    'Country/Continent': 'Country'
}, inplace=True)
df['Followers'] = pd.to_numeric(df['Followers'], errors='coerce')
df = df.dropna(subset=['Followers', 'Profession'])

df['Primary_Profession'] = df['Profession'].str.split(',| and').str[0].str.strip()
profession_map = {
    'Footballer': 'Athlete', 'Wrestler': 'Athlete', 'Boxer': 'Athlete',
    'Cricketer': 'Athlete', 'Tennis player': 'Athlete', 'Basketball player': 'Athlete',
    'Model': 'Model', 'Musician': 'Musician', 'Rapper': 'Musician',
    'Singer': 'Musician', 'DJ': 'Musician', 'Actress': 'Acting',
    'Actor': 'Acting', 'Television personality': 'Media Personality',
    'Social media personality': 'Media Personality',
    'Social media platform': 'Platform', 'Businesswoman': 'Entrepreneur',
    'Businessman': 'Entrepreneur'
}
df['Primary_Profession'] = df['Primary_Profession'].replace(profession_map)

In [None]:
#Encode and scale
le = LabelEncoder()
df['Profession_Code'] = le.fit_transform(df['Primary_Profession'])
features = df[['Followers', 'Profession_Code']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)

#Hierarchical clustering
linked = linkage(X_scaled, method='ward')

#Save dendrogram
plt.figure(figsize=(12, 6))
dendrogram(linked, labels=df['Owner'].values, leaf_rotation=90, leaf_font_size=8)
plt.title('Hierarchical Clustering Dendrogram of Instagram Accounts')
plt.xlabel('Account Owner')
plt.ylabel('Distance')
dendrogram_path = '/mnt/data/hierarchical_dendrogram_instagram.png'
plt.tight_layout()
plt.savefig(dendrogram_path)
plt.close()

dendrogram_path