## Preprocessing Data

In [1]:
# import dependencies 
import pandas as pd
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [2]:
# define function to find sun sign and add in a column 

def get_sun_sign(date):
    day = date.day
    month = date.month_name()
    
    if month == 'December':
        sun_sign = 'Sagittarius' if (day < 22) else 'Capricorn'
    elif month == 'January':
        sun_sign = 'Capricorn' if (day < 20) else 'Aquarius'
    elif month == 'February':
        sun_sign = 'Aquarius' if (day < 19) else 'Pisces'
    elif month == 'March':
        sun_sign = 'Pisces' if (day < 21) else 'Aries'
    elif month == 'April':
        sun_sign = 'Aries' if (day < 20) else 'Taurus'
    elif month == 'May':
        sun_sign = 'Taurus' if (day < 21) else 'Gemini'
    elif month == 'June':
        sun_sign = 'Gemini' if (day < 21) else 'Cancer'
    elif month == 'July':
        sun_sign = 'Cancer' if (day < 23) else 'Leo'
    elif month == 'August':
        sun_sign = 'Leo' if (day < 23) else 'Virgo'
    elif month == 'September':
        sun_sign = 'Virgo' if (day < 23) else 'Libra'
    elif month == 'October':
        sun_sign = 'Libra' if (day < 23) else 'Scorpio'
    elif month == 'November':
        sun_sign = 'Scorpio' if (day < 22) else 'Sagittarius'
    else:
        sun_sign = None
        
    return sun_sign

In [3]:
# define function to find birth month

def get_birth_month(date):
    day = date.day
    month = date.month_name()
    
    if month == 'December':
        birth_month = 'December'
    elif month == 'January':
        birth_month = 'January'
    elif month == 'February':
        birth_month = 'February'
    elif month == 'March':
        birth_month = 'March'
    elif month == 'April':
        birth_month = 'April' 
    elif month == 'May':
        birth_month = 'May' 
    elif month == 'June':
        birth_month = 'June'
    elif month == 'July':
        birth_month = 'July'
    elif month == 'August':
        birth_month = 'August'
    elif month == 'September':
        birth_month = 'September'
    elif month == 'October':
        birth_month = 'October'
    elif month == 'November':
        birth_month = 'November'
    else:
        birth_month = None
        
    return birth_month

In [4]:
# read in csv file and save as dataframe
df_zodiac = pd.read_csv("MBTI_types_Birthdays.csv")


In [5]:
# check dataframe
df_zodiac.head()


Unnamed: 0,Timestamp,Birthday,Birthday.1,Zodiac,MBTI Type,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12
0,4/15/2020 4:26,10/5/2020,10/5,Libra,INFP,,,,,,,,
1,4/16/2020 17:02,1/1/2007,1/1,Capricorn,ENTP,,,,,,,,
2,4/17/2020 20:05,10/3/2006,10/3,Libra,INTP,,,,,,,,
3,4/15/2020 8:35,8/3/2006,8/3,Leo,INTP,,,,,,,,
4,4/14/2020 6:46,4/16/2006,4/16,Aries,ESFP,,,,,,,,


In [6]:
# remove unneccessary columns from DF
# df_zodiac = df_zodiac[['Timestamp', 'Birthday', 'MBTI Type']]
df_zodiac = df_zodiac.drop(columns=['Zodiac','Timestamp', 'Birthday.1', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9',
       'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12'])


In [7]:
# create a dataframe with MBTI types added as columns as columns, using get dummies
df_zodiac = pd.concat([df_zodiac, pd.get_dummies(df_zodiac['MBTI Type'])], axis=1)
df_zodiac.head()


Unnamed: 0,Birthday,MBTI Type,ENFJ,ENFP,ENTJ,ENTP,ESFJ,ESFP,ESTJ,ESTP,INFJ,INFP,INTJ,INTP,ISFJ,ISFP,ISTJ,ISTP
0,10/5/2020,INFP,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
1,1/1/2007,ENTP,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2,10/3/2006,INTP,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,8/3/2006,INTP,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
4,4/16/2006,ESFP,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0


In [8]:
# translate birthday field to datetime format
df_zodiac['Birthday'] = pd.to_datetime(df_zodiac['Birthday'],infer_datetime_format=True)

# create column with zodiac signs in df_zodiac
df_zodiac['Sun_Sign'] = df_zodiac['Birthday'].apply(lambda X: get_sun_sign(X))

# add birth month to df
df_zodiac['Birth_Month'] = df_zodiac['Birthday'].apply(lambda X: get_birth_month(X))

In [9]:
# define function to convert MBTI types to 8 separate dimensions

MBTI_type_traits = {
    "INFP": [
        "Introverted",
        "Intuition",
        "Feeling",
        "Perceiving"
    ],
    "INTP": [
        "Introverted",
        "Intuition",
        "Thinking",
        "Perceiving"
    ],
    "INTJ": [
        "Introverted",
        "Intuition",
        "Thinking",
        "Judging"
    ],
    "INFJ": [
        "Introverted",
        "Intuition",
        "Feeling",
        "Judging"
    ],
    "ENTP": [
        "Extroverted",
        "Intuition",
        "Thinking",
        "Perceiving"
    ],
    "ENFJ": [
        "Extroverted",
        "Intuition",
        "Thinking",
        "Judging"
    ],
    "ENTJ": [
        "Extroverted",
        "Intuition",
        "Thinking",
        "Judging"
    ],
    "ENFP": [
        "Extroverted",
        "Intuition",
        "Feeling",
        "Perceiving"
    ],
    
    'ISTJ': [
        'Introverted',
        'Sensing',
        'Thinking',
        'Judging'
    ],
    'ISFP':[
        'Introverted',
        'Sensing',
        'Feeling',
        'Perceiving'
    ],
    'ISFJ': [
        'Introverted',
        'Sensing',
        'Feeling',
        'Judging'
    ],
    'ISTP':[
        'Introverted',
        'Sensing',
        'Thinking',
        'Perceiving'
    ],
    'ESTJ':[
        'Extroverted',
        'Sensing',
        'Thinking',
        'Judging'
    ],
    'ESFJ':[
        'Extroverted',
        'Sensing',
        'Feeling',
        'Judging'
    ],
    'ESTP':[
        'Extroverted',
        'Sensing',
        'Thinking',
        'Perceiving'
    ],
    'ESFP':[
        'Extroverted',
        'Sensing',
        'Feeling',
        'Perceiving'
    ],
}

traits = []
for MBTI, MBTI_traits in MBTI_type_traits.items():
    traits.extend(MBTI_traits)
    
traits = list(set(traits))

for t in traits:
    df_zodiac[t] = 0

for MBTI, traits in MBTI_type_traits.items():
    for trait in traits:
        df_zodiac.loc[df_zodiac["MBTI Type"] == MBTI, trait] = 1

In [10]:
# add months as dimensions to the datafram
df_zodiac = pd.concat([df_zodiac, pd.get_dummies(df_zodiac['Birth_Month'])], axis=1)
df_zodiac = pd.concat([df_zodiac, pd.get_dummies(df_zodiac['Sun_Sign'])], axis=1)
df_zodiac.head()


Unnamed: 0,Birthday,MBTI Type,ENFJ,ENFP,ENTJ,ENTP,ESFJ,ESFP,ESTJ,ESTP,...,Cancer,Capricorn,Gemini,Leo,Libra,Pisces,Sagittarius,Scorpio,Taurus,Virgo
0,2020-10-05,INFP,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,2007-01-01,ENTP,0,0,0,1,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,2006-10-03,INTP,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,2006-08-03,INTP,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,2006-04-16,ESFP,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
# get list of columns in df_zodiac
df_zodiac.columns

Index(['Birthday', 'MBTI Type', 'ENFJ', 'ENFP', 'ENTJ', 'ENTP', 'ESFJ', 'ESFP',
       'ESTJ', 'ESTP', 'INFJ', 'INFP', 'INTJ', 'INTP', 'ISFJ', 'ISFP', 'ISTJ',
       'ISTP', 'Sun_Sign', 'Birth_Month', 'Introverted', 'Extroverted',
       'Intuition', 'Judging', 'Sensing', 'Feeling', 'Perceiving', 'Thinking',
       'April', 'August', 'December', 'February', 'January', 'July', 'June',
       'March', 'May', 'November', 'October', 'September', 'Aquarius', 'Aries',
       'Cancer', 'Capricorn', 'Gemini', 'Leo', 'Libra', 'Pisces',
       'Sagittarius', 'Scorpio', 'Taurus', 'Virgo'],
      dtype='object')

## Machine Learning Models

### Machine Learning to predict Sun Sign by MBTI Types

In [12]:
# Separate the data into labels and features -- sun sign as target and MBTI Types as dimensions

# Separate the y variable, the labels
y_zodiac_MBTI = df_zodiac['Sun_Sign']

# Separate the X variable, the features
X_zodiac_MBTI = df_zodiac.drop(columns=['Birthday', 'MBTI Type', 'Sun_Sign', 'Birth_Month', 'Introverted', 'Perceiving',
       'Intuition', 'Extroverted', 'Feeling', 'Judging', 'Sensing', 'Thinking',
       'April', 'August', 'December', 'February', 'January', 'July', 'June',
       'March', 'May', 'November', 'October', 'September'])

In [13]:
# Review the y variable Series
y_zodiac_MBTI[:5]

0        Libra
1    Capricorn
2        Libra
3          Leo
4        Aries
Name: Sun_Sign, dtype: object

In [14]:
# Review the X variable DataFrame
X_zodiac_MBTI[:5]

Unnamed: 0,ENFJ,ENFP,ENTJ,ENTP,ESFJ,ESFP,ESTJ,ESTP,INFJ,INFP,...,Cancer,Capricorn,Gemini,Leo,Libra,Pisces,Sagittarius,Scorpio,Taurus,Virgo
0,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,0
1,0,0,0,1,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
# Check the balance of our target values
balance_y_zodiac_MBTI = y_zodiac_MBTI.value_counts()
balance_y_zodiac_MBTI

Taurus         32
Cancer         31
Capricorn      28
Pisces         28
Virgo          27
Leo            26
Sagittarius    26
Aquarius       26
Aries          24
Scorpio        20
Libra          19
Gemini         19
Name: Sun_Sign, dtype: int64

In [16]:
# Split the data using train_test_split

X_zodiac_MBTI_train, X_zodiac_MBTI_test, y_zodiac_MBTI_train, y_zodiac_MBTI_test = train_test_split(
    X_zodiac_MBTI,
    y_zodiac_MBTI
)


In [17]:
# Instantiate the Logistic Regression model
logistic_regression_model = LogisticRegression()

# Fit the model using training data
zodiac_MBTI_model = logistic_regression_model.fit(X_zodiac_MBTI_train, y_zodiac_MBTI_train)

In [18]:
# Make a prediction using the testing data
predictions_zodiac_MBTI = zodiac_MBTI_model.predict(X_zodiac_MBTI_test)
predictions_zodiac_MBTI

array(['Cancer', 'Libra', 'Taurus', 'Capricorn', 'Capricorn', 'Capricorn',
       'Virgo', 'Cancer', 'Cancer', 'Taurus', 'Taurus', 'Capricorn',
       'Pisces', 'Virgo', 'Taurus', 'Pisces', 'Libra', 'Aries', 'Pisces',
       'Aquarius', 'Scorpio', 'Aries', 'Cancer', 'Capricorn', 'Scorpio',
       'Sagittarius', 'Aquarius', 'Sagittarius', 'Sagittarius', 'Cancer',
       'Libra', 'Sagittarius', 'Pisces', 'Leo', 'Capricorn', 'Libra',
       'Cancer', 'Cancer', 'Pisces', 'Sagittarius', 'Aquarius', 'Aries',
       'Virgo', 'Gemini', 'Aries', 'Cancer', 'Taurus', 'Capricorn',
       'Cancer', 'Cancer', 'Capricorn', 'Gemini', 'Cancer', 'Cancer',
       'Leo', 'Virgo', 'Scorpio', 'Taurus', 'Cancer', 'Virgo', 'Leo',
       'Capricorn', 'Capricorn', 'Aquarius', 'Taurus', 'Cancer', 'Aries',
       'Leo', 'Libra', 'Virgo', 'Aquarius', 'Sagittarius', 'Aries',
       'Gemini', 'Virgo', 'Aquarius', 'Taurus'], dtype=object)

In [19]:
# Print the balanced_accuracy score of the model
accuracy_zodiac_MBTI = balanced_accuracy_score(y_zodiac_MBTI_test, predictions_zodiac_MBTI)
accuracy_zodiac_MBTI

1.0

### Machine Learning to Predict Sun Sign by Expanded MBTI Traits

In [20]:
# Separate the data into labels and features -- sun sign as target and expanded MBTI Type traits as dimensions (i.e. Introverted, Extroverted, Intuition, etc.)

# Separate the y variable, the labels
y_zodiac_traits = df_zodiac['Sun_Sign']

# Separate the X variable, the features
X_zodiac_traits = df_zodiac[['Introverted', 'Perceiving',
       'Intuition', 'Extroverted', 'Feeling', 'Judging', 'Sensing', 'Thinking']]

In [21]:
y_zodiac_traits[:5]

0        Libra
1    Capricorn
2        Libra
3          Leo
4        Aries
Name: Sun_Sign, dtype: object

In [22]:
X_zodiac_traits[:5]

Unnamed: 0,Introverted,Perceiving,Intuition,Extroverted,Feeling,Judging,Sensing,Thinking
0,1,1,1,0,1,0,0,0
1,0,1,1,1,0,0,0,1
2,1,1,1,0,0,0,0,1
3,1,1,1,0,0,0,0,1
4,0,1,0,1,1,0,1,0


In [23]:
# Split the data using train_test_split

X_train_zodiac_traits, X_test_zodiac_traits, y_train_zodiac_traits, y_test_zodiac_traits = train_test_split(
    X_zodiac_traits,
    y_zodiac_traits
)


In [24]:
# Instantiate the Logistic Regression model
logistic_regression_model = LogisticRegression()

# Fit the model using training data
zodiac_trait_model = logistic_regression_model.fit(X_train_zodiac_traits, y_train_zodiac_traits)

In [25]:
# Make a prediction using the testing data
predictions_zodiac_traits = zodiac_trait_model.predict(X_test_zodiac_traits)
predictions_zodiac_traits

array(['Capricorn', 'Capricorn', 'Cancer', 'Capricorn', 'Cancer',
       'Cancer', 'Cancer', 'Taurus', 'Taurus', 'Taurus', 'Cancer',
       'Taurus', 'Taurus', 'Leo', 'Cancer', 'Taurus', 'Taurus', 'Cancer',
       'Taurus', 'Taurus', 'Taurus', 'Taurus', 'Sagittarius', 'Taurus',
       'Taurus', 'Taurus', 'Taurus', 'Taurus', 'Taurus', 'Taurus',
       'Taurus', 'Taurus', 'Capricorn', 'Cancer', 'Capricorn', 'Taurus',
       'Taurus', 'Sagittarius', 'Sagittarius', 'Scorpio', 'Capricorn',
       'Cancer', 'Cancer', 'Taurus', 'Capricorn', 'Capricorn', 'Taurus',
       'Capricorn', 'Taurus', 'Taurus', 'Taurus', 'Sagittarius', 'Taurus',
       'Taurus', 'Sagittarius', 'Cancer', 'Capricorn', 'Taurus', 'Cancer',
       'Taurus', 'Taurus', 'Taurus', 'Taurus', 'Taurus', 'Taurus',
       'Taurus', 'Sagittarius', 'Capricorn', 'Capricorn', 'Capricorn',
       'Sagittarius', 'Cancer', 'Cancer', 'Taurus', 'Cancer', 'Taurus',
       'Taurus'], dtype=object)

In [26]:
# Print the balanced_accuracy score of the model
accuracy_zodiac_traits = balanced_accuracy_score(y_test_zodiac_traits, predictions_zodiac_traits)
accuracy_zodiac_traits

0.04692760942760943

### Machine Learning to Predict Birth Month by Expanded MBTI Type Traits

In [27]:
# Separate the data into labels and features -- birth month as target and expanded MBTI Type traits as dimensions (i.e. Introverted, Extroverted, Intuition, etc.)

# Separate the y variable, the labels
y_birth_month_traits = df_zodiac['Birth_Month']

# Separate the X variable, the features
X_birth_month_traits = df_zodiac[['Feeling', 'Judging', 'Introverted', 'Sensing', 'Thinking', 'Intuition',
       'Perceiving', 'Extroverted']]

In [28]:
# Split the data using train_test_split

X_birth_month_traits_train, X_birth_month_traits_test, y_birth_month_traits_train, y_birth_month_traits_test = train_test_split(
    X_birth_month_traits,
    y_birth_month_traits
)


In [29]:
# Instantiate the Logistic Regression model
logistic_regression_model = LogisticRegression()

# Fit the model using training data
birth_month_traits_model = logistic_regression_model.fit(X_birth_month_traits_train, y_birth_month_traits_train)

In [30]:
# Make a prediction using the testing data
predictions_birth_month_traits = birth_month_traits_model.predict(X_birth_month_traits_test)
predictions_birth_month_traits

array(['January', 'September', 'September', 'August', 'April',
       'September', 'April', 'November', 'February', 'May', 'August',
       'February', 'November', 'July', 'September', 'February',
       'February', 'February', 'February', 'August', 'September',
       'February', 'July', 'January', 'May', 'September', 'January',
       'August', 'September', 'July', 'April', 'July', 'September', 'May',
       'February', 'February', 'January', 'May', 'August', 'September',
       'February', 'August', 'September', 'September', 'February',
       'August', 'November', 'February', 'November', 'February', 'August',
       'August', 'May', 'July', 'February', 'May', 'February',
       'September', 'September', 'November', 'May', 'April', 'September',
       'May', 'May', 'April', 'November', 'February', 'February', 'May',
       'July', 'February', 'February', 'May', 'September', 'August',
       'September'], dtype=object)

In [31]:
# Print the balanced_accuracy score of the model
accuracy_birth_month_traits = balanced_accuracy_score(y_birth_month_traits_test, predictions_birth_month_traits)
accuracy_birth_month_traits

0.05787037037037037

### Machine Learning Birth Month and Introversion

In [32]:
# Separate the data into labels and features to predict Introversion by Birth Month

# Separate the y variable, the labels
y_introversion = df_zodiac['Introverted']

# Separate the X variable, the features
X_introversion = df_zodiac[['April', 'August', 'December', 'February',
       'January', 'July', 'June', 'March', 'May', 'November', 'October',
       'September']]

In [33]:
# Split the data using train_test_split

X_train_introversion, X_test_introversion, y_train_introversion, y_test_introversion = train_test_split(
    X_introversion,
    y_introversion
)


In [34]:
# Instantiate the Logistic Regression model
logistic_regression_model = LogisticRegression()

# Fit the model using training data
lr_model_introversion = logistic_regression_model.fit(X_train_introversion, y_train_introversion)

In [35]:
# Make a prediction using the testing data
predictions_introversion = lr_model_introversion.predict(X_test_introversion)
predictions_introversion

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [36]:
# Print the balanced_accuracy score of the model
accuracy_introversion = balanced_accuracy_score(y_test_introversion, predictions_introversion)
accuracy_introversion

0.5

### Machine Learning Extroversion and Birth Month

In [37]:
# Separate the data into labels and features to predict Extroversion by Birth Month

# Separate the y variable, the labels
y_extroversion = df_zodiac['Extroverted']

# Separate the X variable, the features
X_extroversion = df_zodiac[['April', 'August', 'December', 'February',
       'January', 'July', 'June', 'March', 'May', 'November', 'October',
       'September']]

In [38]:
# Split the data using train_test_split

X_train_extroversion, X_test_extroversion, y_train_extroversion, y_test_extroversion = train_test_split(
    X_extroversion,
    y_extroversion
)


In [39]:
# Instantiate the Logistic Regression model
logistic_regression_model = LogisticRegression()

# Fit the model using training data
lr_model_extroversion = logistic_regression_model.fit(X_train_extroversion, y_train_extroversion)

In [40]:
# Make a prediction using the testing data
predictions_extroversion = lr_model_extroversion.predict(X_test_extroversion)
predictions_extroversion

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [41]:
# Print the balanced_accuracy score of the model
accuracy_extroversion = balanced_accuracy_score(y_test_extroversion, predictions_extroversion)
accuracy_extroversion

0.5

### Machine Learning Extroversion by Sun Sign

In [42]:
# Separate the data into labels and features to predict Extroversion by sun sign

# Separate the y variable, the labels
y_extroversion_zodiac = df_zodiac['Extroverted']

# Separate the X variable, the features
X_extroversion_zodiac = df_zodiac[['Aquarius', 'Aries', 'Cancer', 'Capricorn',
       'Gemini', 'Leo', 'Libra', 'Pisces', 'Sagittarius', 'Scorpio', 'Taurus',
       'Virgo']]

In [43]:
# Split the data using train_test_split

X_train_extroversion_zodiac, X_test_extroversion_zodiac, y_train_extroversion_zodiac, y_test_extroversion_zodiac = train_test_split(
    X_extroversion_zodiac,
    y_extroversion_zodiac
)


In [44]:
# Instantiate the Logistic Regression model
logistic_regression_model = LogisticRegression()

# Fit the model using training data
lr_model_extroversion_zodiac = logistic_regression_model.fit(X_train_extroversion_zodiac, y_train_extroversion_zodiac)

In [45]:
# Make a prediction using the testing data
predictions_extroversion_zodiac = lr_model_extroversion_zodiac.predict(X_test_extroversion_zodiac)
predictions_extroversion_zodiac

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [46]:
# Print the balanced_accuracy score of the model
accuracy_extroversion_zodiac = balanced_accuracy_score(y_test_extroversion_zodiac, predictions_extroversion_zodiac)
accuracy_extroversion_zodiac

0.5

In [47]:
# Separate the data into labels and features to predict Extroversion by sun sign other method

# Separate the y variable, the labels
y_extroversion_zodiac_2 = df_zodiac['Sun_Sign']

# Separate the X variable, the features
X_extroversion_zodiac_2 = df_zodiac[['Extroverted']]

In [48]:
# Split the data using train_test_split

X_train_extroversion_zodiac_2, X_test_extroversion_zodiac_2, y_train_extroversion_zodiac_2, y_test_extroversion_zodiac_2 = train_test_split(
    X_extroversion_zodiac_2,
    y_extroversion_zodiac_2
)


In [49]:
# Instantiate the Logistic Regression model
logistic_regression_model = LogisticRegression()

# Fit the model using training data
lr_model_extroversion_zodiac_2 = logistic_regression_model.fit(X_train_extroversion_zodiac_2, y_train_extroversion_zodiac_2)

In [50]:
# Make a prediction using the testing data
predictions_extroversion_zodiac_2 = lr_model_extroversion_zodiac_2.predict(X_test_extroversion_zodiac_2)
predictions_extroversion_zodiac_2

array(['Taurus', 'Taurus', 'Capricorn', 'Taurus', 'Taurus', 'Taurus',
       'Taurus', 'Taurus', 'Taurus', 'Capricorn', 'Taurus', 'Taurus',
       'Capricorn', 'Taurus', 'Taurus', 'Taurus', 'Taurus', 'Taurus',
       'Taurus', 'Taurus', 'Taurus', 'Taurus', 'Taurus', 'Taurus',
       'Capricorn', 'Taurus', 'Taurus', 'Taurus', 'Taurus', 'Taurus',
       'Capricorn', 'Capricorn', 'Taurus', 'Taurus', 'Taurus', 'Taurus',
       'Taurus', 'Taurus', 'Taurus', 'Taurus', 'Capricorn', 'Capricorn',
       'Taurus', 'Taurus', 'Taurus', 'Taurus', 'Taurus', 'Taurus',
       'Taurus', 'Taurus', 'Capricorn', 'Taurus', 'Capricorn', 'Taurus',
       'Capricorn', 'Capricorn', 'Taurus', 'Taurus', 'Taurus',
       'Capricorn', 'Taurus', 'Taurus', 'Taurus', 'Capricorn', 'Taurus',
       'Taurus', 'Taurus', 'Taurus', 'Taurus', 'Taurus', 'Capricorn',
       'Taurus', 'Capricorn', 'Taurus', 'Taurus', 'Capricorn', 'Taurus'],
      dtype=object)

In [51]:
# Print the balanced_accuracy score of the model
accuracy_extroversion_zodiac_2 = balanced_accuracy_score(y_test_extroversion_zodiac_2, predictions_extroversion_zodiac_2)
accuracy_extroversion_zodiac_2

0.07916666666666666