In [1146]:
import re
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.ticker as mtick
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

plt.rcParams['figure.facecolor'] = 'white'
pd.options.display.max_colwidth = None

orig_df = pd.read_csv('responses.csv')
orig_df['Timestamp'] = pd.to_datetime(orig_df['Timestamp'])

def explodeColumn(df, column_name, prefix=None):
    if prefix is None:
        prefix = column_name
    return df[column_name].fillna('').str.split(',\s*').apply(lambda x: pd.Series(1,index=x)).fillna(0).drop("", axis=1, errors='ignore').add_prefix(prefix + ': ')

def getHighestStep(num):
    for i in [0, 10, 15, 20, 25, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150][::-1]:
        if i < num:
            return i
    return 0

def dropAndMerge(df1, df2, column_name):
    return df1.merge(df2, left_index=True, right_index=True).drop(column_name, axis=1)

def extractNum(string):
    if string is None:
        return None
    r = re.search(r'\d+', str(string))
    if r is None:
        return None
    return int(r.group())

# sanitize columns
column = "Are you a TG fan?"
df = explodeColumn(orig_df, column, 'TG Fan')
orig_df = dropAndMerge(orig_df, df, column)

column = "Do you consider yourself a part of any of the following fandoms?"
df = explodeColumn(orig_df, column, 'Fandom')
orig_df = dropAndMerge(orig_df, df, column)

column = "If you are not any of the above or would like to add more, what would you consider yourself a part of (related to TG)?"
orig_df = orig_df.drop(column, axis=1)

column = "If you are a FURRY, did you enter that fandom before or after you discovered an interest in TG? If you are not or if your interest in furry is unrelated to TG, please skip this question."
orig_df = orig_df.drop(column, axis=1)

column = "What gender do you define yourself as in real life? (Check all that apply)"
df = explodeColumn(orig_df, column, 'Gender IRL')
df = df[["Gender IRL: Male", "Gender IRL: Female", "Gender IRL: Transgender (MtF)", "Gender IRL: Transgender (FtM)", "Gender IRL: Non-binary", "Gender IRL: Genderfluid", "Gender IRL: Unsure", "Gender IRL: Prefer not to share"]]
orig_df = dropAndMerge(orig_df, df, column)

column = "What is your sexuality regarding real life attractions? (Spectrum / Binary)"
orig_df["Sexuality IRL"] = orig_df[column].apply(extractNum)
orig_df.drop(column, axis=1)

column = "What gender do you define yourself regarding TG scenarios and fantasies? (Check all that apply)"
df = explodeColumn(orig_df, column, 'Gender TG')
df = df[["Gender TG: Male", "Gender TG: Female", "Gender TG: Transgender (MtF)", "Gender TG: Transgender (FtM)", "Gender TG: Non-binary", "Gender TG: Genderfluid", "Gender TG: Unsure", "Gender TG: Prefer not to share"]]
orig_df = dropAndMerge(orig_df, df, column)

column = "What is your sexuality regarding TG scenarios and fantasies? (Spectrum / Binary)"
orig_df["Sexuality TG"] = orig_df[column].apply(extractNum)
orig_df.drop(column, axis=1)

column = "What media or artistic formats do you find an interest in or follow (in regards to TG)?"
df = explodeColumn(orig_df, column, 'Media')
orig_df = dropAndMerge(orig_df, df, column)

column = "If the media formats above do not contain one or more of what you were looking for, please add it in the space below."
orig_df = orig_df.drop(column, axis=1)

column = "What WEBSITES do you consider influential or contributing to your interest in TG?"
df = explodeColumn(orig_df, column, 'Website')
orig_df = dropAndMerge(orig_df, df, column)

column = "What TG TRIGGERS OR MECHANISMS do you enjoy?"
df = explodeColumn(orig_df, column, 'Trigger')
orig_df = dropAndMerge(orig_df, df, column)

column = "What SUPPLEMENTARY FEATURES of TG do you enjoy?"
df = explodeColumn(orig_df, column, 'Trigger Feature')
orig_df = dropAndMerge(orig_df, df, column)

column = "If your desired TRIGGER OR MECHANISM selection was not included, what would you add to this list? Fill in the blank below."
orig_df = orig_df.drop(column, axis=1)

column = "If your desired SUPPLEMENTARY FEATURE was not included, what would you add to this list? Fill in the blank below."
orig_df = orig_df.drop(column, axis=1)

column = "What EMOTIONS in TG do you enjoy viewing? (in relation to the TGee)"
df = explodeColumn(orig_df, column, 'Emotion')
orig_df = dropAndMerge(orig_df, df, column)

column = "If your desired EMOTION was not included, what would you add to this list? Fill in the blank below."
orig_df = orig_df.drop(column, axis=1)

column = "What PRE-TG FEATURES do you enjoy viewing (in relation to the TGee)?"
df = explodeColumn(orig_df, column, 'Pre-TG')
orig_df = dropAndMerge(orig_df, df, column)

column = "What POST-TG FEATURES do you enjoy viewing (in relation to the TGee)?"
df = explodeColumn(orig_df, column, 'Post-TG')
orig_df = dropAndMerge(orig_df, df, column)

column = "If your desired PRE-TG FEATURE was not included, what would you add to this list? Fill in the blank below."
orig_df = orig_df.drop(column, axis=1)

column = "If your desired POST-TG FEATURE was not included, what would you add to this list? Fill in the blank below."
orig_df = orig_df.drop(column, axis=1)

column = "What SEX-RELATED FEATURES do you enjoy viewing (post TG, in relation to the TGee)?"
df = explodeColumn(orig_df, column, 'Sex')
orig_df = dropAndMerge(orig_df, df, column)

column = "If your desired SEX-RELATED FEATURE was not included, what would you add to this list? Fill in the blank below."
orig_df = orig_df.drop(column, axis=1)

column = "What ENVIRONMENTS do you enjoy viewing (related to TG media)?"
df = explodeColumn(orig_df, column, 'Environment')
orig_df = dropAndMerge(orig_df, df, column)

column = "If your desired ENVIRONMENT was not included, what would you add to this list? Fill in the blank below."
orig_df = orig_df.drop(column, axis=1)

column = "What TG PARTNERS do you enjoy viewing (in relation to TGee)?"
df = explodeColumn(orig_df, column, 'Partner')
orig_df = dropAndMerge(orig_df, df, column)

column = "If your desired PARTNER was not included, what would you add to this list? Fill in the blank below."
orig_df = orig_df.drop(column, axis=1)

column = "What ROLES in TG do you enjoy imagining yourself in?"
df = explodeColumn(orig_df, column, 'Role')
df = df[[
 'Role: Mutual (you are TGee along with someone else)',
 'Role: Observer (you are watching someone TG)',
 'Role: Comforter (you help someone through their TG)',
 'Role: Main subject (you are TGee)',
 'Role: Controller (you are making someone TG)',
 'Role: Narrator (talking about a TG)',
 'Role: I do not see myself in any role.',
 "Role: Partner (you are the the TGee's partner)",
 'Role: Creator (the TG is your creation)',
 'Role: Creator (the TGee is your creation)',
]]
orig_df = dropAndMerge(orig_df, df, column)

column = "Where did you hear about this survey?"
df = explodeColumn(orig_df, column, 'From')
df["From: Patreon"] = df.apply(lambda x: 1.0 if x["From: Patreon"] or 'patreon' in orig_df["If your desired referral was not included, what would you add to this list? Fill in the blank below."].fillna('')[x.name].lower() else 0.0, axis=1)
orig_df = dropAndMerge(orig_df, df, column)

column = "If your desired referral was not included, what would you add to this list? Fill in the blank below."
orig_df = orig_df.drop(column, axis=1)

column = "Do you have any FEEDBACK for this survey to improve, or questions you would like to see asked? Fill out the space below."
orig_df = orig_df.drop(column, axis=1)

column = "Do you consider yourself a content creator?"
orig_df = orig_df.drop(column, axis=1)

column = "What SCENARIOS OR THEMES do you enjoy viewing (related to TG media)?"
df = explodeColumn(orig_df, column, 'Scenario')
orig_df = dropAndMerge(orig_df, df, column)

column = "If your desired SCENARIO OR THEME was not included, what would you add to this list? Fill in the blank below."
orig_df = orig_df.drop(column, axis=1)

column = "Email Address"
orig_df = orig_df.drop(column, axis=1)

column = "Feel free to type anything else you want into this box and I'll read it. -espeon"
orig_df = orig_df.drop(column, axis=1)

column = "What do you dislike about current TG media? Is there something that, if you encounter it, you will stop consuming the work? Is there something you exclusively seek out as a requirement?"
orig_df = orig_df.drop(column, axis=1)

column = "What do you like/dislike about the online TG community? What do you wish would change about the online TG community?"
orig_df = orig_df.drop(column, axis=1)

column = "If you had $1000 to spend on TG-related things, what would you spend it on?"
orig_df = orig_df.drop(column, axis=1)

column = "How extreme do you consider your sex preferences (in relation to TG)?"
orig_df = orig_df.drop(column, axis=1)

column = "If your desired ROLE was not included, what would you add to this list? Fill in the blank below."
orig_df = orig_df.drop(column, axis=1)


# column sanitization...
orig_df["Gender IRL: Male"] = orig_df.apply(lambda x: 1.0 if x["Gender IRL: Male"] or x["Gender IRL: Transgender (FtM)"] else 0.0, axis=1)
orig_df["Gender IRL: Female"] = orig_df.apply(lambda x: 1.0 if x["Gender IRL: Female"] or x["Gender IRL: Transgender (MtF)"] else 0.0, axis=1)
orig_df["Gender TG: Male"] = orig_df.apply(lambda x: 1.0 if x["Gender TG: Male"] or x["Gender TG: Transgender (FtM)"] else 0.0, axis=1)
orig_df["Gender TG: Female"] = orig_df.apply(lambda x: 1.0 if x["Gender TG: Female"] or x["Gender TG: Transgender (MtF)"] else 0.0, axis=1)
orig_df["Trigger: Body Part Size / Modification / Detachable / Attachable"] = orig_df.apply(lambda x: 1.0 if x["Trigger: Body Part Size / Modification / Detachable / Attachable"] or x["Trigger: Body Part Size / Modification / Detachable"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger: Body Part Size / Modification / Detachable", axis=1)
orig_df["Trigger Feature: Hair Length Change"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Hair Length Change"] or x["Trigger Feature: Hair Change"] else 0.0, axis=1)
orig_df["Trigger Feature: Hair Color Change"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Hair Color Change"] or x["Trigger Feature: Hair Change"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger Feature: Hair Change", axis=1)

orig_df["Trigger Feature: Involuntary"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Involuntary"] or x["Trigger: Involuntary"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger: Involuntary", axis=1)

orig_df["Trigger Feature: Temporary"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Temporary"] or x["Trigger: Temporary"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger: Temporary", axis=1)

orig_df["Trigger Feature: Mental Change"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Mental Change"] or x["Trigger Feature: Mental TG"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger Feature: Mental TG", axis=1)

orig_df["Trigger: Metamorphosis / Cocoon"] = orig_df.apply(lambda x: 1.0 if x["Trigger: Metamorphosis / Cocoon"] or x["Trigger Feature: Metamorphosis / Cocoon"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger Feature: Metamorphosis / Cocoon", axis=1)

orig_df["Trigger: Sculpting"] = orig_df.apply(lambda x: 1.0 if x["Trigger: Sculpting"] or x["Trigger Feature: Sculpting"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger Feature: Sculpting", axis=1)

orig_df["Trigger: Skinsuit / Bodysuit"] = orig_df.apply(lambda x: 1.0 if x["Trigger: Skinsuit"] or x["Trigger: Skinsuit / Bodysuit"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger: Skinsuit", axis=1)
    
orig_df["Trigger: Targeted / Bullying"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Targeted / Bullying"] or x["Trigger: Targeted / Bullying"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger Feature: Targeted / Bullying", axis=1)
orig_df["Trigger: Coma"] = orig_df.apply(lambda x: 1.0 if x["Trigger: Coma"] or x["Trigger Feature: Coma"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger Feature: Coma", axis=1)
orig_df["Trigger: Global Event"] = orig_df.apply(lambda x: 1.0 if x["Trigger: Global TG"] or x["Trigger: Global Event"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger: Global TG", axis=1)
orig_df["Trigger: Group TG"] = orig_df.apply(lambda x: 1.0 if x["Trigger: Group TG"] or x["Trigger: Group TG"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger: Group TG", axis=1)
orig_df["Trigger: Rubber / Leather / Latex"] = orig_df.apply(lambda x: 1.0 if x["Trigger: Rubber / Leather / Latex"] or x["Trigger Feature: Rubber / Leather / Latex"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger Feature: Rubber / Leather / Latex", axis=1)
orig_df["Trigger Feature: Permanent / Irreversible"] = orig_df.apply(lambda x: 1.0 if x["Trigger: Permanent / Irreversible"] or x["Trigger Feature: Permanent / Irreversible"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger: Permanent / Irreversible", axis=1)

orig_df["Trigger Feature: Slow TG (~months/years)"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Slow TG (~years)"] or x["Trigger Feature: Slow TG (~months/years)"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger Feature: Slow TG (~years)", axis=1)

orig_df["Pre-TG: Feminization (Before TG) / Sissification"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Feminization (Before TG)"] or x["Pre-TG: Feminization (Before TG) / Sissification"] or x["Trigger Feature: Feminization (Before TG) / Sissification"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger Feature: Feminization (Before TG)", axis=1)
orig_df = orig_df.drop("Trigger Feature: Feminization (Before TG) / Sissification", axis=1)

orig_df["Post-TG: Feminization (After TG)"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Feminization (After TG)"] or x["Post-TG: Feminization (After TG)"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger Feature: Feminization (After TG)", axis=1)

orig_df["Sex: Fertility"] = orig_df.apply(lambda x: 1.0 if x["Sex: Fertility"] or x["Post-TG: Fertility"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Fertility", axis=1)

orig_df["Sex: Buttplug / Tailplug"] = orig_df.apply(lambda x: 1.0 if x["Sex: Buttplug / Tailplug"] or x["Post-TG: Buttplug"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Buttplug", axis=1)

orig_df["Sex: Mind Break"] = orig_df.apply(lambda x: 1.0 if x["Sex: Mind Break"] or x["Post-TG: Mind Break"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Mind Break", axis=1)

orig_df["Sex: Heat / Estrus"] = orig_df.apply(lambda x: 1.0 if x["Sex: Heat / Estrus"] or x["Post-TG: Heat / Estrus"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Heat / Estrus", axis=1)

orig_df["Sex: Breeding"] = orig_df.apply(lambda x: 1.0 if x["Sex: Breeding"] or x["Post-TG: Breeding"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Breeding", axis=1)

orig_df["Sex: Public Use / Free Use"] = orig_df.apply(lambda x: 1.0 if x["Sex: Public Use / Free Use"] or x["Post-TG: Public Use / Free Use"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Public Use / Free Use", axis=1)

orig_df["Sex: Cunnilingus (Receiving)"] = orig_df.apply(lambda x: 1.0 if x["Sex: Cunnilingus (Receiving)"] or x["Sex: Cunnilingus"] else 0.0, axis=1)
orig_df = orig_df.drop("Sex: Cunnilingus", axis=1)
    
orig_df["Sex: Presenting / Bent over (TGee)"] = orig_df.apply(lambda x: 1.0 if x["Post-TG: Presenting"] or x["Sex: Presenting / Bent over (TGee)"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Presenting", axis=1)

orig_df["Sex: Milking (TGee)"] = orig_df.apply(lambda x: 1.0 if x["Sex: Milking (TGee)"] or x["Post-TG: Milking (TGee)"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Milking (TGee)", axis=1)

orig_df["Sex: Orgasms"] = orig_df.apply(lambda x: 1.0 if x["Sex: Orgasms"] or x["Post-TG: Orgasms"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Orgasms", axis=1)

orig_df["Sex: Lesbian / Yuri"] = orig_df.apply(lambda x: 1.0 if x["Sex: Lesbian / Yuri"] or x["Partner: Yuri / Lesbian"] else 0.0, axis=1)
orig_df = orig_df.drop("Partner: Yuri / Lesbian", axis=1)

orig_df["Sex: Rape / Noncon / Forced"] = orig_df.apply(lambda x: 1.0 if x["Post-TG: Rape"] or x["Sex: Rape / Noncon / Forced"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Rape", axis=1)

orig_df["Sex: Sterility"] = orig_df.apply(lambda x: 1.0 if x["Post-TG: Sterility"] or x["Sex: Sterility"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Sterility", axis=1)

orig_df["Sex: Dubcon / Coercion"] = orig_df.apply(lambda x: 1.0 if x["Sex: Dubcon / Coercion"] or x["Sex: Dubcon / Coercsion"] else 0.0, axis=1)
orig_df = orig_df.drop("Sex: Dubcon / Coercsion", axis=1)

orig_df["Sex: Exhibitionism (Forced) / Forced Exposure"] = orig_df.apply(lambda x: 1.0 if x["Sex: Exhibitionism (Forced) / Forced Exposure"] or x["Post-TG: Exhibitionism (Forced) / Forced Exposure"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Exhibitionism (Forced) / Forced Exposure", axis=1)
orig_df["Sex: Exhibitionism (Willing)"] = orig_df.apply(lambda x: 1.0 if x["Sex: Exhibitionism (Willing)"] or x["Post-TG: Exhibitionism (Willing)"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Exhibitionism (Willing)", axis=1)

orig_df["Sex: Tight Vagina"] = orig_df.apply(lambda x: 1.0 if x["Sex: Tight Vagina"] or x["Post-TG: Tight Vagina"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Tight Vagina", axis=1)

orig_df["Sex: Abuse (Physical) / Ryona"] = orig_df.apply(lambda x: 1.0 if x["Sex: Abuse (Physical) / Ryona"] or x["Sex: Abuse (Physical)"] or x["Sex: Abuse"] else 0.0, axis=1)
orig_df = orig_df.drop("Sex: Abuse", axis=1)
orig_df = orig_df.drop("Sex: Abuse (Physical)", axis=1)

orig_df["Post-TG: Schoolgirl Uniform"] = orig_df.apply(lambda x: 1.0 if x["Post-TG: Schoolgirl / JK"] or x["Post-TG: Schoolgirl Uniform"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Schoolgirl / JK", axis=1)

orig_df["Trigger Feature: Voice Change (Physical)"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Voice Change (Physical)"] or x["Post-TG: Voice Change (Physical)"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Voice Change (Physical)", axis=1)

orig_df["Trigger Feature: Verbal Communication Change"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Verbal Communication Change"] or x["Post-TG: Verbal Communication Change"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Verbal Communication Change", axis=1)

orig_df["Trigger Feature: Intelligence Change"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Intelligence Change"] or x["Post-TG: Intelligence Change"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Intelligence Change", axis=1)

orig_df["Trigger Feature: Transgender"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Transgender"] or x["Trigger: Transgender"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger: Transgender", axis=1)

orig_df["Trigger Feature: Accidental"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Accidental"] or x["Trigger: Accidental"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger: Accidental", axis=1)

orig_df["Trigger: Parallel Dimension"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Pocket Dimension"] or x["Trigger: Parallel Dimension"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger Feature: Pocket Dimension", axis=1)

orig_df["Trigger: Technological / Bionic / Android"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Technological / Bionic / Android"] or x["Trigger: Technological / Bionic / Android"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger Feature: Technological / Bionic / Android", axis=1)

orig_df["Trigger Feature: Trade / Deal"] = orig_df.apply(lambda x: 1.0 if x["Trigger Feature: Trade / Deal"] or x["Trigger Feature: Trade"] else 0.0, axis=1)
orig_df = orig_df.drop("Trigger Feature: Trade", axis=1)

orig_df["Post-TG: Guro / Gore"] = orig_df.apply(lambda x: 1.0 if x["Post-TG: Guro / Gore"] or x["Post-TG: Guro"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Guro", axis=1)
orig_df["Post-TG: Breastfeeding (Baby)"] = orig_df.apply(lambda x: 1.0 if x["Post-TG: Breastfeeding (Baby)"] or x["Post-TG: Breastfeeding"] else 0.0, axis=1)
orig_df = orig_df.drop("Post-TG: Breastfeeding", axis=1)
orig_df["Sex: Watersports / Urination"] = orig_df.apply(lambda x: 1.0 if x["Sex: Watersports / Urination"] or x["Sex: Watersports"] else 0.0, axis=1)
orig_df = orig_df.drop("Sex: Watersports", axis=1)
orig_df["Role: Creator (the TG is your creation)"] = orig_df.apply(lambda x: 1.0 if x["Role: Creator (the TG is your creation)"] or x["Role: Creator (the TGee is your creation)"] else 0.0, axis=1)
orig_df = orig_df.drop("Role: Creator (the TGee is your creation)", axis=1)
orig_df["Scenario: Part of experiment"] = orig_df.apply(lambda x: 1.0 if x["Environment: Experiment"] or x["Scenario: Part of experiment"] else 0.0, axis=1)
orig_df = orig_df.drop("Environment: Experiment", axis=1)
orig_df["Environment: Hot Springs / Onsen"] = orig_df.apply(lambda x: 1.0 if x["Environment: Onsen"] or x["Environment: Hot Springs / Onsen"] else 0.0, axis=1)
orig_df = orig_df.drop("Environment: Onsen", axis=1)
orig_df["Scenario: Summoning (of Other) / Demon Summoning"] = orig_df.apply(lambda x: 1.0 if x["Scenario: Summoning (of Other) / Demon Summoning"] or x["Environment: Summoning"] else 0.0, axis=1)
orig_df = orig_df.drop("Environment: Summoning", axis=1)

orig_df["Scenario: Date"] = orig_df.apply(lambda x: 1.0 if x["Scenario: Date"] or x["Environment: Date"] else 0.0, axis=1)
orig_df = orig_df.drop("Environment: Date", axis=1)

orig_df["Scenario: Kidnapping"] = orig_df.apply(lambda x: 1.0 if x["Scenario: Kidnapping"] or x["Environment: Kidnapping"] else 0.0, axis=1)
orig_df = orig_df.drop("Environment: Kidnapping", axis=1)

orig_df["Scenario: Alien Abduction"] = orig_df.apply(lambda x: 1.0 if x["Scenario: Alien Abduction"] or x["Environment: Alien Abduction"] else 0.0, axis=1)
orig_df = orig_df.drop("Environment: Alien Abduction", axis=1)

orig_df["Scenario: Fighting against bodily pleasures"] = orig_df.apply(lambda x: 1.0 if x["Scenario: Fighting against bodily pleasures"] or x["Environment: Fighting against bodily pleasures"] else 0.0, axis=1)
orig_df = orig_df.drop("Environment: Fighting against bodily pleasures", axis=1)

orig_df["Scenario: Giving into bodily pleasures"] = orig_df.apply(lambda x: 1.0 if x["Scenario: Giving into bodily pleasures"] or x["Environment: Giving into bodily pleasures"] else 0.0, axis=1)
orig_df = orig_df.drop("Environment: Giving into bodily pleasures", axis=1)

orig_df["Environment: Home (TGee)"] = orig_df.apply(lambda x: 1.0 if x["Environment: Home"] or x["Environment: Home (TGee)"] else 0.0, axis=1)
orig_df = orig_df.drop("Environment: Home", axis=1)

orig_df["Partner: Prostitution (Customer)"] = orig_df.apply(lambda x: 1.0 if x["Partner: Prostitution (Paid)"] or x["Partner: Prostitution (Customer)"] else 0.0, axis=1)
orig_df = orig_df.drop("Partner: Prostitution (Paid)", axis=1)

orig_df["Partner: Trainer (Erotic)"] = orig_df.apply(lambda x: 1.0 if x["Partner: Trainer (Erotic)"] or x["Partner: Trainer"] else 0.0, axis=1)
orig_df = orig_df.drop("Partner: Trainer", axis=1)

orig_df["Pre-TG: Previously familiar with TG"] = orig_df.apply(lambda x: 1.0 if x["Pre-TG: Familiar with TG"] or x["Pre-TG: Previously familiar with TG"] else 0.0, axis=1)
orig_df = orig_df.drop("Pre-TG: Familiar with TG", axis=1)

orig_df = orig_df.drop("Fandom: Anime Fan", axis=1)
orig_df = orig_df.drop("Environment: Bathroom", axis=1)
orig_df = orig_df.drop("Post-TG: Filming / Pictures", axis=1)
orig_df = orig_df.drop("Sex: Drugs", axis=1)
orig_df = orig_df.drop("Post-TG: Exhibitionism", axis=1)
orig_df = orig_df.drop("Environment: Demons", axis=1)
orig_df = orig_df.drop("Environment: House", axis=1)
orig_df = orig_df.drop("Trigger Feature: Size Change", axis=1)
orig_df = orig_df.drop("Partner: Grandfather", axis=1)
orig_df = orig_df.drop("Partner: Grandmother", axis=1)
orig_df = orig_df.drop("Sex: Cheating / Netorare", axis=1)
'done'


'done'

In [1171]:
columns = [column for column in orig_df.columns if ':' in column]
traits_df = orig_df[columns] #.groupby(by=["Scenario: Kidnapping"]).apply(lambda x: x)
sanitized_df = orig_df.apply(lambda x: x)

first_trues = traits_df.apply(lambda x: getHighestStep(x.idxmax()))
last_trues = traits_df.iloc[::-1].apply(lambda x: x.idxmax())

for column in ["Trigger: Incubus", "Trigger: Demon", "Pre-TG: Incubus", "Post-TG: Blonde", "Post-TG: Brunette", "Post-TG: Redhead"]:
    first_trues[column] = 155
for column in ["Website: 8muses", "Website: NexxusHost.com", "Website: Overflowing Bra", "Website: Nifty Erotic Stories Archive", "Trigger: Gadget", "Trigger: Sci-Fi", "Trigger: Splitting", "Trigger: Multiple Changes", "Post-TG: Haircut", "Scenario: Getting a haircut", "Scenario: Hiding changes", "Post-TG: Lolita", "Post-TG: Petticoats", "Post-TG: Secretary", "Post-TG: Rubber / Leather / Latex", "Post-TG: Gender Identity Exploration / Readjustment", "Post-TG: Shaving", "Partner: Another TGee (Opposite TG)"]:
    first_trues[column] = 156
for column in ["Sex: Females Only", "Sex: Body Exploration (Sexual)", "Partner: Husband", "Partner: Wife", "Partner: Mugger / Robber / Thief", "Scenario: Bad End", "Scenario: Yuri", "Trigger Feature: Punishment", "Trigger Feature: Obligation", "Environment: Bedroom", "Post-TG: Trying on Clothes", "Role: Partner (you are the the TGee's partner)", "Sex: Watersports / Urination"]:
    first_trues[column] = 157
for column in ["Role: Creator (the TG is your creation)", "Scenario: Awkward interactions with friends"]:
    first_trues[column] = 158

for column in first_trues.index:
    sanitized_df[column].values[:first_trues[column]] = None
    
#     if the last true was a while ago then erase everything up until the last true
#     if len(sanitized_df) - last_trues[column] > 100:
#         sanitized_df[column].values[last_trues[column]+1:] = None

columns = [column for column in sanitized_df.columns if ':' in column]
traits_df = sanitized_df[columns]

decayed = [column for column in columns if traits_df.isna()[column][411]]

columns = [a for a in sanitized_df.columns if ':' in a]
traits_df = sanitized_df[columns] #.groupby(by=["Scenario: Kidnapping"]).apply(lambda x: x)

traits_meta_df = traits_df.apply(lambda x: x.value_counts()).transpose()
traits_meta_df["Total"] = traits_meta_df[1.0] + traits_meta_df[0.0]
traits_meta_df["Percent"] = traits_meta_df[1.0] / traits_meta_df["Total"]
traits_meta_df["FirstTrue"] = first_trues
traits_meta_df["LastTrue"] = last_trues
corrs = traits_df.corr().unstack()
traits_meta_df["Corrs"] = traits_df.columns.map(lambda x: corrs[x].drop(x).sort_values(ascending=False))
traits_meta_df = traits_meta_df.sort_values(by="Percent", ascending=False)

traits_df.to_csv("anonymized_records.csv")
traits_df.to_json("anonymized_records.json", orient="records")


In [1169]:
pd.DataFrame(traits_df.columns, columns=["title"]).to_json("columns.json", orient="records")


In [962]:
import time, sys
from IPython.display import clear_output

def update_progress(index, total=1):
    progress = index / total
    bar_length = 20
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1

    block = int(round(bar_length * progress))

    clear_output(wait = True)
    text = "Progress: [{0}] {1:.1f}% {2}/{3}".format( "#" * block + "-" * (bar_length - block), progress * 100, index, total)
    print(text)

corrs = pd.DataFrame(traits_df.corr().unstack())
corrs = corrs.rename(columns={0: "Corr"})

number_of_elements = 1000

num_columns = len(traits_df.columns)

corrs["Percent"] = None
i = 0
update_progress(0, num_columns)

for column in traits_df:
    traits_meta_df = traits_df[traits_df[column] == 1.0].apply(lambda x: x.value_counts()).transpose()
    traits_meta_df["Total"] = traits_meta_df[1.0] + traits_meta_df[0.0]
    traits_meta_df["Percent"] = traits_meta_df[1.0] / traits_meta_df["Total"]

    for second in traits_meta_df.index:
        corrs.at[(column, second), "Percent"] = traits_meta_df.loc[second]["Percent"]
    update_progress(i, num_columns)
    i += 1

update_progress(1)
corrs_orig = corrs.copy()


Progress: [####################] 100.0% 1/1


In [963]:
i = 0
update_progress(0, num_columns)

for column in traits_df:
    corrs.loc[column] = corrs.loc[column].sort_values("Corr", ascending=False)
    update_progress(i, num_columns)
    i += 1

update_progress(1)


Progress: [####################] 100.0% 1/1


In [981]:
corrs = corrs_orig.copy()
corrs.loc[column] = corrs.loc[column].sort_values("Corr", ascending=False)

corrs


Unnamed: 0,Unnamed: 1,Corr,Percent
TG Fan: Yes,TG Fan: Yes,1.000000,
TG Fan: Yes,TG Fan: Unsure,-0.793623,
TG Fan: Yes,Fandom: Anime / Weeb,0.050815,0.842893
TG Fan: Yes,Fandom: Furry,0.015444,0.122195
TG Fan: Yes,Fandom: Games,0.130778,0.759162
...,...,...,...
Scenario: Hiding changes,Scenario: Yuri,,
Scenario: Hiding changes,Scenario: Awkward interactions with friends,,
Scenario: Hiding changes,Scenario: Bad End,,
Scenario: Hiding changes,Scenario: Getting a haircut,,


In [1136]:
corrs = traits_df.corr()

df = pd.DataFrame(traits_df.columns)
column = "TG Fan: Yes"
# corrs = corrs[column].sort_values(ascending=False).reset_index().rename(columns={'index': 'Trait', column: 'Corr'}).drop(0)
# corrs["Percent"] = traits_meta_df.loc[column]["Percent"]

corrs_for_column = corrs[column].reset_index()
corrs_for_column["Percent"] = traits_meta_df["Percent"].values

# for column in traits_df.columns:
#     df = 

# corrs.reset_index()