In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px

In [2]:
df = pd.read_csv('personality_dataset.csv')
df.head()

Unnamed: 0,Time_spent_Alone,Stage_fear,Social_event_attendance,Going_outside,Drained_after_socializing,Friends_circle_size,Post_frequency,Personality
0,Number of hours an individual typically spends...,Whether the person experiences stage fear,Frequency (scale 0-10) of attending social eve...,How often the individual goes outside (scale 0...,Whether the individual feels drained after soc...,Number of close friends,Frequency of posting on social media,Target variable: Introvert or Extrovert.
1,4,No,4,6,No,13,5,Extrovert
2,9,Yes,0,0,Yes,0,3,Introvert
3,9,Yes,1,2,Yes,5,2,Introvert
4,0,No,6,7,No,14,8,Extrovert


In [20]:
def load_and_clean_data():
    """Load dan bersihkan dataset personality dengan handling missing values"""
    try:
        # Load data dengan skip baris deskripsi jika ada
        df = pd.read_csv('personality_dataset.csv', skiprows=1)

        # Bersihkan nama kolom
        df.columns = ['Time_spent_Alone', 'Stage_fear', 'Social_event_attendance',
                      'Going_outside', 'Drained_after_socializing', 'Friends_circle_size',
                      'Post_frequency', 'Personality']

        # Konversi kolom numerik dan handle missing values
        numeric_cols = ['Time_spent_Alone', 'Social_event_attendance', 'Going_outside',
                       'Friends_circle_size', 'Post_frequency']

        for col in numeric_cols:
            df[col] = pd.to_numeric(df[col], errors='coerce')
            df[col].fillna(df[col].median(), inplace=True)

        # Handle missing values untuk kolom kategorikal
        df['Stage_fear'].fillna(df['Stage_fear'].mode()[0], inplace=True)
        df['Drained_after_socializing'].fillna(df['Drained_after_socializing'].mode()[0], inplace=True)

        return df

    except Exception as e:
        st.error(f"Error loading data: {str(e)}")
        return None

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2901 entries, 0 to 2900
Data columns (total 8 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   Time_spent_Alone           2838 non-null   object
 1   Stage_fear                 2828 non-null   object
 2   Social_event_attendance    2839 non-null   object
 3   Going_outside              2835 non-null   object
 4   Drained_after_socializing  2849 non-null   object
 5   Friends_circle_size        2824 non-null   object
 6   Post_frequency             2836 non-null   object
 7   Personality                2901 non-null   object
dtypes: object(8)
memory usage: 181.4+ KB


In [4]:
df_cleaned = df.drop(index=0).copy()

In [5]:
numeric_columns = [
    "Time_spent_Alone",
    "Social_event_attendance",
    "Going_outside",
    "Friends_circle_size",
    "Post_frequency"
]
df_cleaned[numeric_columns] = df_cleaned[numeric_columns].apply(pd.to_numeric, errors='coerce')

In [6]:
df_cleaned["Stage_fear"].value_counts(), df_cleaned["Drained_after_socializing"].value_counts(), df_cleaned["Personality"].value_counts()

(Stage_fear
 No     1417
 Yes    1410
 Name: count, dtype: int64,
 Drained_after_socializing
 No     1441
 Yes    1407
 Name: count, dtype: int64,
 Personality
 Extrovert    1491
 Introvert    1409
 Name: count, dtype: int64)

# **Visualisasi 1: Hubungan antara Stage Fear dan Tipe Kepribadian**

In [10]:
personality_counts = df_cleaned['Personality'].value_counts()

colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FECA57', '#FF9FF3']

# Membuat Pie Chart dengan Plotly
fig_pie = go.Figure(data=[go.Pie(
    labels=personality_counts.index,
    values=personality_counts.values,
    hole=0.4,
    marker=dict(colors=colors[:len(personality_counts)], line=dict(color='#FFFFFF', width=2)),
    textinfo='label+percent+value',
    textfont=dict(size=14),
    showlegend=True
)])

fig_pie.update_layout(
    title={
        'text': "Distribusi Tipe Kepribadian dalam Dataset",
        'x': 0.5,
        'font': {'size': 20}
    },
    height=450,

    legend=dict(
        title="Tipe Kepribadian"
    )
)

fig_pie.show()

🎯 Visualisasi 1: Distribusi Tipe Kepribadian


**Analisis dan Insight**

- Tujuan Visualisasi:
Menunjukkan komposisi dan proporsi setiap tipe kepribadian dalam dataset untuk memahami representasi sampel penelitian.

- Insight yang Ditunjukkan:

  - Extrovert: 1491 orang (51.4%)

  - Introvert: 1409 orang (48.6%)

  - Tipe Dominan: Extrovert

# **Visualisasi 2: Rata-rata Waktu Sendirian Berdasarkan Kepribadian**

In [26]:
numeric_columns = [
    "Time_spent_Alone",
    "Social_event_attendance",
    "Going_outside",
    "Friends_circle_size",
    "Post_frequency"
]
df_cleaned[numeric_columns] = df_cleaned[numeric_columns].apply(pd.to_numeric, errors='coerce')

for col in numeric_columns:

    df_cleaned[col].fillna(df_cleaned[col].median(), inplace=True)

fig_scatter = px.scatter(
    df_cleaned,
    x='Time_spent_Alone',
    y='Social_event_attendance',
    color='Personality',
    size='Friends_circle_size',
    hover_data=['Going_outside', 'Post_frequency'],
    color_discrete_sequence=px.colors.qualitative.Vivid,
    title="Korelasi: Waktu Sendiri vs Kehadiran Acara Sosial",
    labels={
        'Time_spent_Alone': 'Waktu Sendiri (jam/hari)',
        'Social_event_attendance': 'Kehadiran Acara Sosial (skala)',
        'Friends_circle_size': 'Ukuran Lingkaran Pertemanan',
        'Personality': 'Tipe Kepribadian'
    }
)

fig_scatter.update_layout(
    height=500,
    xaxis=dict(gridcolor='lightgray', gridwidth=0.5),
    yaxis=dict(gridcolor='lightgray', gridwidth=0.5)
)

# Menampilkan grafik
fig_scatter.show()


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.





**Analisis dan Insight**
- TUJUAN VISUALISASI:
Menganalisis hubungan antara preferensi waktu sendiri dengan tingkat partisipasi sosial.

- INSIGHT YANG DITUNJUKKAN:
    - Pola hubungan (cenderung negatif) antara waktu sendiri dan aktivitas sosial.
    - Pengelompokan (clustering) berdasarkan tipe kepribadian terlihat dari warna.
    - Ukuran lingkaran pertemanan direpresentasikan oleh ukuran titik."

# **Visualisasi 3: Ukuran Lingkaran Pertemanan terhadap Postingan Sosial Media**

In [28]:
radar_cols = ['Time_spent_Alone', 'Social_event_attendance', 'Going_outside', 'Friends_circle_size', 'Post_frequency']
personality_profiles = df_cleaned.groupby('Personality')[radar_cols].mean()

# Label untuk sumbu radar chart
categories = ['Waktu Sendiri', 'Acara Sosial', 'Intensitas Keluar', 'Ukuran Pertemanan', 'Frekuensi Posting']

# Inisialisasi gambar
fig_radar = go.Figure()

# Loop melalui setiap profil kepribadian untuk ditambahkan ke chart
for personality, profile in personality_profiles.iterrows():
    fig_radar.add_trace(go.Scatterpolar(
        r=profile.values.tolist(),
        theta=categories,
        fill='toself',
        name=personality
    ))

# Mengupdate layout/tampilan grafik
fig_radar.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      range=[0, personality_profiles.max().max() * 1.1]
    )),
  showlegend=True,
  title="Profil Karakteristik Rata-rata per Tipe Kepribadian",
  height=550
)

fig_radar.show()


🕸️ Membuat Visualisasi 3: Profil Radar Karakteristik


**Analisis dan Insight**

- TUJUAN VISUALISASI:
Membandingkan profil karakteristik dari berbagai tipe kepribadian secara visual untuk melihat kekuatan dan kelemahan relatif mereka pada dimensi yang berbeda.
- INSIGHT YANG DITUNJUKKAN:
    -  Perbedaan Pola: Tipe kepribadian mana yang lebih cenderung menghabiskan waktu sendiri vs. bersosialisasi.
    -  Outlier: Apakah ada tipe kepribadian dengan ukuran pertemanan yang jauh lebih besar atau kecil?
    - Keseimbangan: Bagaimana setiap tipe menyeimbangkan antara aktivitas online (posting) dan offline (keluar rumah).