In [13]:
import coins

# Loading and first Preprocessing of raw Data, Creation of customized DataFrames

In [6]:
# LOAD RAW DATA
ipip = coins.io.getPreprocessedRawData('ipip')
mpzm = coins.io.getPreprocessedRawData('mpzm')
mood = coins.io.getPreprocessedRawData('mood')
images = coins.io.getPreprocessedRawData('images')
sessions = coins.io.getPreprocessedRawData('sessions')
imageLabels = coins.io.getPreprocessedRawData('imageLabels')

In [7]:
# BUILD DEFINED DATAFRAMES
dfPersonality = coins.dfcreation.createPersonality(ipip)
dfImageDescriptions = coins.dfcreation.createImageDescriptions(images)
dfImageRatings = coins.dfcreation.createImageRatings(images)
dfSocioDemographics = coins.dfcreation.createSocioDemographics(ipip, mpzm, ipip, mood)
dfImageContents = coins.dfcreation.createImageContents(images, imageLabels)

In [8]:
# SAVE DEFINED DATAFRAMES
coins.io.saveInitialDFs(dfPersonality, 'personality')
coins.io.saveInitialDFs(dfImageDescriptions, 'imageDescriptions')
coins.io.saveInitialDFs(dfImageRatings, 'imageRatings')
coins.io.saveInitialDFs(dfSocioDemographics, 'socioDemographics')
coins.io.saveInitialDFs(dfImageContents, 'imageContents')

# Special Preparation of dfImageDescriptions

In [6]:
# LOAD DATAFRAME
dfImageDescriptions = coins.io.loadInitialDFs('imageDescriptions')

In [None]:
# OPTION 1: Analyze and save imageDescriptions (needs credentials, costs money)
dfImageDescriptions = translateToEnglish(dfImageDescriptions)
dfImageDescriptions = coins.nluTranslation.analyzeEnglishSentimentAndEmotions(dfImageDescriptions)
dfImageDescriptions = coins.nluTranslation.fillImageDescriptions(dfImageDescriptions)
coins.io.saveAnalyzedImageDescriptions(dfImageDescriptions)

In [9]:
# OPTION 2: Load existing analyzed imageDescriptions
dfImageDescriptions = coins.io.loadAnalyzedImageDescriptions()

# Check for significant Correlations between all 5 DataFrames

In [3]:
# LOAD DATAFRAMES
dfPersonality = coins.io.loadInitialDFs('personality')
dfImageRatings = coins.io.loadInitialDFs('imageRatings')
dfSocioDemographics = coins.io.loadInitialDFs('socioDemographics')
dfImageContents = coins.io.loadInitialDFs('imageContents')
dfImageDescriptions = coins.io.loadAnalyzedImageDescriptions()

In [4]:
# PREPARE DATAFRAME VALUES
dfPersonality = coins.correlation.preparePersonality(dfPersonality, multiclass=False, split='mean')
dfImageDescriptions = coins.correlation.prepareImageDescriptions(dfImageDescriptions, multiclass=False, split='median')
dfSocioDemographics, dropList = coins.correlation.prepareSocioDemographics(dfSocioDemographics, 1)

In [6]:
# CALCULATE CORRELATIONS AND P-VALUES
_, _, _, pPersonalitySocioDemographics, cPersonalitySocioDemographics = coins.correlation.calculateCorrWithPValue(dfPersonality, dfSocioDemographics)
_, _, _, pPersonalityImageDescriptions, cPersonalityImageDescriptions = coins.correlation.calculateCorrWithPValue(dfPersonality, dfImageDescriptions)
_, _, _, pPersonalityImageRatings, cPersonalityImageRatings = coins.correlation.calculateCorrWithPValue(dfPersonality, dfImageRatings)
_, _, _, pSocioDemographicsImageDescriptions, cSocioDemographicsImageDescriptions = coins.correlation.calculateCorrWithPValue(dfSocioDemographics, dfImageDescriptions)
_, _, _, pSocioDemographicsImageRatings, cSocioDemographicsImageRatings = coins.correlation.calculateCorrWithPValue(dfSocioDemographics, dfImageRatings)
_, _, _, pImageDescriptionsImageRatings, cImageDescriptionsImageRatings = coins.correlation.calculateCorrWithPValue(dfImageDescriptions, dfImageRatings)
_, _, _, pImageContentsSocioDemographics, cImageContentsSocioDemographics = coins.correlation.calculateCorrWithPValue(dfImageContents, dfSocioDemographics)
_, _, _, pImageContentsImageDescriptions, cImageContentsImageDescriptions = coins.correlation.calculateCorrWithPValue(dfImageContents, dfImageDescriptions)
_, _, _, pImageContentsImageRatings, cImageContentsImageRatings = coins.correlation.calculateCorrWithPValue(dfImageContents, dfImageRatings)
_, _, _, pImageContentsPersonality, cImageContentsPersonality = coins.correlation.calculateCorrWithPValue(dfImageContents, dfPersonality)

In [7]:
# EXTRACT SIGNIFICANT CORRELATIONS
sPersonalitySocioDemographics = coins.correlation.extractSignificantCorrelations(pPersonalitySocioDemographics, cPersonalitySocioDemographics)
sPersonalityImageDescriptions = coins.correlation.extractSignificantCorrelations(pPersonalityImageDescriptions, cPersonalityImageDescriptions)
sPersonalityImageRatings = coins.correlation.extractSignificantCorrelations(pPersonalityImageRatings, cPersonalityImageRatings)
sSocioDemographicsImageDescriptions = coins.correlation.extractSignificantCorrelations(pSocioDemographicsImageDescriptions, cSocioDemographicsImageDescriptions)
sSocioDemographicsImageRatings = coins.correlation.extractSignificantCorrelations(pSocioDemographicsImageRatings, cSocioDemographicsImageRatings)
sImageDescriptionsImageRatings = coins.correlation.extractSignificantCorrelations(pImageDescriptionsImageRatings, cImageDescriptionsImageRatings)
sImageContentsSocioDemographics = coins.correlation.extractSignificantCorrelations(pImageContentsSocioDemographics, cImageContentsSocioDemographics)
sImageContentsImageDescriptions = coins.correlation.extractSignificantCorrelations(pImageContentsImageDescriptions, cImageContentsImageDescriptions)
sImageContentsImageRatings = coins.correlation.extractSignificantCorrelations(pImageContentsImageRatings, cImageContentsImageRatings)
sImageContentsPersonality = coins.correlation.extractSignificantCorrelations(pImageContentsPersonality, cImageContentsPersonality)

In [8]:
# SAVE SIGNIFICANT CORRELATIONS
coins.io.saveSignificantCorrelations(sPersonalitySocioDemographics, 'personality_socioDemographics')
coins.io.saveSignificantCorrelations(sPersonalityImageDescriptions, 'personality_imageDescriptions')
coins.io.saveSignificantCorrelations(sPersonalityImageRatings, 'personality_imageRatings')
coins.io.saveSignificantCorrelations(sSocioDemographicsImageDescriptions, 'socioDemographics_imageDescriptions')
coins.io.saveSignificantCorrelations(sSocioDemographicsImageRatings, 'imageRatings_socioDemographics')
coins.io.saveSignificantCorrelations(sImageDescriptionsImageRatings, 'imageDescriptions_imageRatings')
coins.io.saveSignificantCorrelations(sImageContentsSocioDemographics, 'imageContents_socioDemographics')
coins.io.saveSignificantCorrelations(sImageContentsImageDescriptions, 'imageContents_imageDescriptions')
coins.io.saveSignificantCorrelations(sImageContentsImageRatings, 'imageContents_imageRatings')
coins.io.saveSignificantCorrelations(sImageContentsPersonality, 'imageContents_personality')

In [9]:
# SAVE PREPARED DATAFRAMES
coins.io.savePreparedDFs(dfPersonality, 'personality')
coins.io.savePreparedDFs(dfImageDescriptions, 'imageDescriptions')
coins.io.savePreparedDFs(dfSocioDemographics, 'socioDemographics')
coins.io.savePreparedDFs(dfImageRatings, 'imageRatings')
coins.io.savePreparedDFs(dfImageContents, 'imageContents')

# Find best Predictions

In [3]:
# LOAD PREPARED DATAFRAMES
dfPersonality = coins.io.loadPreparedDFs('personality')
dfImageRatings = coins.io.loadPreparedDFs('imageRatings')
dfSocioDemographics = coins.io.loadPreparedDFs('socioDemographics')
dfImageContents = coins.io.loadPreparedDFs('imageContents')
dfImageDescriptions = coins.io.loadPreparedDFs('imageDescriptions')

In [7]:
result = coins.classification.findBestClassifier([dfSocioDemographics, dfImageContents, dfImageRatings], dfPersonality, "dfPersonality", inputFeatureCombination=False, printProgress=True)
result

completed: neurotizismusCategory
completed: extraversionCategory
completed: offenheitCategory
completed: vertraeglichkeitCategory
completed: gewissenhaftigkeitCategory


Unnamed: 0,TargetFeature,InputFeature,BestAlgorithm,R^2,Accuracy
0,neurotizismusCategory,job_status_edu_parttime| educational_achieveme...,"KNN Classifier, Degree: 1",0.25,0.814815
1,extraversionCategory,job_status_edu_parttime| work_district_LU| job...,"KNN Classifier, Degree: 2",0.22619,0.807692
2,offenheitCategory,company_size_250.0| educational_achievement_Be...,Logistic Regression,0.041667,0.782609
3,vertraeglichkeitCategory,job_sector_Gastgewerbe/Beherbergung und Gastro...,"SVC (polynomial), Degree: 1",-0.05,0.740741
4,gewissenhaftigkeitCategory,job_status_edu_parttime| registration_ageKat_5...,SVC (linear),-0.335165,0.666667
