# Build ARM model in Reddit and external data

In [1]:
spark

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 19, 6, Finished, Available)

In [2]:
workspace_default_storage_account = "group10astorage46582e02e"
workspace_default_container = "azureml-blobstore-e8a18b52-3288-4d1f-9f32-d5a9249c2c0e"
workspace_wasbs_base_url = (f"wasbs://{workspace_default_container}@{workspace_default_storage_account}.blob.core.windows.net/")
comment_load = spark.read.parquet(f"{workspace_wasbs_base_url}/mbti_comments.parquet")
comment_load.printSchema()

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 19, 7, Finished, Available)

root
 |-- sub_id: string (nullable = true)
 |-- comment_author: string (nullable = true)
 |-- comment_text: string (nullable = true)
 |-- link_id: string (nullable = true)
 |-- comment_score: long (nullable = true)
 |-- comment_controversiality: long (nullable = true)
 |-- reply_to: string (nullable = true)
 |-- year: integer (nullable = true)
 |-- month: integer (nullable = true)



In [3]:
comment_load.cache()

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 18, 8, Finished, Available)

DataFrame[sub_id: string, comment_author: string, comment_text: string, link_id: string, comment_score: bigint, comment_controversiality: bigint, reply_to: string, year: int, month: int]

## Data preprocessing

In [8]:
from pyspark.sql.functions import udf, when,col
from pyspark.sql.types import StringType
import re

# List of personality types in uppercase
personality_types = ["ESTJ", "ISTJ", "INFP", "ENFP", "INTJ", "ENTJ", "INTP", "ENTP",
                    "ESFJ", "ISFJ", "ENFJ", "INFJ", "ESFP", "ISFP", "ISTP", "ESTP"]

# Convert the list to a regex pattern with case-insensitive flag
pattern = "(?i)\\b(" + "|".join(personality_types) + ")\\b"

# Define UDF to extract all matches
def extract_all_types(title):
    matches = re.findall(pattern, title, re.IGNORECASE)
    # Convert matches to uppercase
    matches_upper = [match.upper() for match in matches]
    return ', '.join(matches_upper)

extract_all_types_udf = udf(extract_all_types, StringType())

# Apply UDF to get all MBTI types
comment_load = comment_load.withColumn("mbti_type_related_temp", extract_all_types_udf(col("comment_text")))

# Set 'mbti_type_related' to 'general' for empty matches
comment_load = comment_load.withColumn("mbti_type_related",
                                         when(col("mbti_type_related_temp") == "", "general")
                                         .otherwise(col("mbti_type_related_temp")))

# Drop the temporary column
comment_load = comment_load.drop("mbti_type_related_temp")

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 18, 13, Finished, Available)

In [7]:
comment_load.show()

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 18, 12, Finished, Available)

+------+--------------------+--------------------+---------+-------------+------------------------+--------+----+-----+-----------------+
|sub_id|      comment_author|        comment_text|  link_id|comment_score|comment_controversiality|reply_to|year|month|mbti_type_related|
+------+--------------------+--------------------+---------+-------------+------------------------+--------+----+-----+-----------------+
|rfzn00|     Master-Elk-5465|yes it feels like...|t3_rfzn00|            7|                       0|      t3|2021|   12|          general|
|rfuyza|PragmaticGuardian613|   Hahaha! What?????|t3_rfuyza|            1|                       0|      t1|2021|   12|          general|
|rezbts|           [deleted]|           [deleted]|t3_rezbts|            2|                       0|      t1|2021|   12|          general|
|rfmj3f|        GiveretLivni|I'd photo my frie...|t3_rfmj3f|            1|                       0|      t3|2021|   12|          general|
|rfu8rx|        samiaboubaya|I thi

In [9]:
# save the comment in head 5 to show in the website
comment_load.limit(5).toPandas().to_csv("Users/ml2078/fall-2023-reddit-project-team-10/data/csv/comment_with_type_head.csv",index=False)

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 18, 14, Finished, Available)

In [13]:
arm_comment = comment_load.select('comment_text','mbti_type_related')
arm_comment.show()

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 18, 18, Finished, Available)

+--------------------+-----------------+
|        comment_text|mbti_type_related|
+--------------------+-----------------+
|yes it feels like...|          general|
|   Hahaha! What?????|          general|
|           [deleted]|          general|
|I'd photo my frie...|          general|
|I think you may b...|       ENFP, ENFP|
|&gt;they're reall...|          general|
|Interesting why d...|          general|
|I did some creepy...|          general|
|I see Shrek using...|          general|
|                isfp|             ISFP|
|i convinced my co...|          general|
|I'm not fragile. ...|          general|
|&gt;Kid\n\nFar fr...|          general|
|Haha, lol, I used...|          general|
|   Me, but I’m a guy|          general|
|       ENTJ, or ESTJ|       ENTJ, ESTJ|
|"Etherium" is not...|          general|
|         Full of cum|          general|
|ENFP. (Sing in Bi...|             ENFP|
|           [deleted]|          general|
+--------------------+-----------------+
only showing top

In [14]:
# filter the data further by dropping the "general" comment
arm_comment = arm_comment[arm_comment['mbti_type_related'] != 'general']
arm_comment.show()

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 18, 19, Finished, Available)

+--------------------+-----------------+
|        comment_text|mbti_type_related|
+--------------------+-----------------+
|I think you may b...|       ENFP, ENFP|
|                isfp|             ISFP|
|       ENTJ, or ESTJ|       ENTJ, ESTJ|
|ENFP. (Sing in Bi...|             ENFP|
|                Intj|             INTJ|
|Because we're pre...|             INFJ|
|                Istp|             ISTP|
|My part of our be...| ENFP, ESTJ, ENTP|
|Entp actually, bu...|             ENTP|
|infp here and it'...|             INFP|
|               ISFP?|             ISFP|
|ENFP- I just clic...|       ENFP, ENFP|
|As a user of both...|       INTJ, ISTP|
|You are an INTJ w...|       INTJ, INFP|
|You really made a...|             INTJ|
|Am I your INFJ 4w...|       INFJ, INTP|
|hugs you but is s...|             INTJ|
|Oh please do my f...|             INFJ|
|Not related to my...|             ISFJ|
| My INTP brother too|             INTP|
+--------------------+-----------------+
only showing top

In [15]:
#save the result 
arm_comment.limit(5).toPandas().to_csv("Users/ml2078/fall-2023-reddit-project-team-10/data/csv/arm_comment_head.csv",index=False)

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 18, 20, Finished, Available)

In [16]:
arm_comment.write.mode("overwrite").parquet(f"{workspace_wasbs_base_url}/arm_comment.parquet")

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 18, 21, Finished, Available)

### Read the comments data

In [2]:
workspace_default_storage_account = "group10astorage46582e02e"
workspace_default_container = "azureml-blobstore-e8a18b52-3288-4d1f-9f32-d5a9249c2c0e"
workspace_wasbs_base_url = (f"wasbs://{workspace_default_container}@{workspace_default_storage_account}.blob.core.windows.net/")
arm_comment = spark.read.parquet(f"{workspace_wasbs_base_url}/arm_comment.parquet")

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 7, Finished, Available)

In [3]:
arm_comment.show()

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 8, Finished, Available)

+--------------------+-----------------+
|        comment_text|mbti_type_related|
+--------------------+-----------------+
|I think you may b...|       ENFP, ENFP|
|                isfp|             ISFP|
|       ENTJ, or ESTJ|       ENTJ, ESTJ|
|ENFP. (Sing in Bi...|             ENFP|
|                Intj|             INTJ|
|Because we're pre...|             INFJ|
|                Istp|             ISTP|
|My part of our be...| ENFP, ESTJ, ENTP|
|Entp actually, bu...|             ENTP|
|infp here and it'...|             INFP|
|               ISFP?|             ISFP|
|ENFP- I just clic...|       ENFP, ENFP|
|As a user of both...|       INTJ, ISTP|
|You are an INTJ w...|       INTJ, INFP|
|You really made a...|             INTJ|
|Am I your INFJ 4w...|       INFJ, INTP|
|hugs you but is s...|             INTJ|
|Oh please do my f...|             INFJ|
|Not related to my...|             ISFJ|
| My INTP brother too|             INTP|
+--------------------+-----------------+
only showing top

### Read the external data

In [4]:
import pandas as pd
mbti_in_post_df = pd.read_csv("Users/xx123/fall-2023-reddit-project-team-10/data/csv/mbti_in_post.csv",)
print(mbti_in_post_df)

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 9, Finished, Available)

       type                                               post mbti_in_post
0      INFJ  enfp and intj moments    sportscenter not top ...   ENFP, INTJ
1      INFJ  The last thing my INFJ friend posted on his fa...         INFJ
2      INFJ  Hello ENFJ7. Sorry to hear of your distress. I...         ENFJ
3      INFJ  Dear ENFP:  What were your favorite video game...         ENFP
4      INFJ  Yo entp ladies... if you're into a complimenta...         ENTP
...     ...                                                ...          ...
80383  INFP  Ok Mr. ENFJ, I have a conundrum for you.  I am...         ENFJ
80384  INFP  I greatly appreciate your input.  What you and...         ENFJ
80385  INFP  I don't know for sure that he is an ENFJ but i...         ENFJ
80386  INFP  I can't honestly see the point in saying which...         INTP
80387  INFP  Just interested to know what some other INFJs'...         INFJ

[80388 rows x 3 columns]


In [5]:
#check the length of mbti post dataset
len(mbti_in_post_df)

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 10, Finished, Available)

80388

In [6]:
mbti_in_post_df = mbti_in_post_df[['post','mbti_in_post']]
mbti_in_post_df.columns=['comment_text','mbti_type_related']
mbti_in_post_df

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 11, Finished, Available)

Unnamed: 0,comment_text,mbti_type_related
0,enfp and intj moments sportscenter not top ...,"ENFP, INTJ"
1,The last thing my INFJ friend posted on his fa...,INFJ
2,Hello ENFJ7. Sorry to hear of your distress. I...,ENFJ
3,Dear ENFP: What were your favorite video game...,ENFP
4,Yo entp ladies... if you're into a complimenta...,ENTP
...,...,...
80383,"Ok Mr. ENFJ, I have a conundrum for you. I am...",ENFJ
80384,I greatly appreciate your input. What you and...,ENFJ
80385,I don't know for sure that he is an ENFJ but i...,ENFJ
80386,I can't honestly see the point in saying which...,INTP


### Merge the datasets

In [7]:
from pyspark.sql import SparkSession

# Convert Pandas DataFrame to PySpark DataFrame
mbti_in_post_spark = spark.createDataFrame(mbti_in_post_df)


StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 12, Finished, Available)

In [8]:
mbti_in_post_spark.show()

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 13, Finished, Available)

+--------------------+-----------------+
|        comment_text|mbti_type_related|
+--------------------+-----------------+
|enfp and intj mom...|       ENFP, INTJ|
|The last thing my...|             INFJ|
|Hello ENFJ7. Sorr...|             ENFJ|
|Dear ENFP:  What ...|             ENFP|
|Yo entp ladies......|             ENTP|
|a pokemon world  ...|             INFJ|
|I like this perso...|             INTJ|
|Hello *ENTP Grin*...|             ENTP|
|You know you're a...|             ENTP|
|No; The way he co...|             ENTP|
|Sherlock in the m...|       ENTP, ESTJ|
|TBH, and biased, ...|       INFP, ESTJ|
|*Checks list* I'm...|             INFJ|
|I'm ANTP (Leaning...|       ENTP, INTP|
|I also imagine EN...|             ENTP|
|Damn, need to tru...|             INFP|
|I bow to my entp ...|             ENTP|
|Personal opinion ...|       INTJ, INTP|
|This is the most ...|             INTP|
|INTJ Recently I s...|             INTJ|
+--------------------+-----------------+
only showing top

In [16]:
#merge the data by column name
combined_df = arm_comment.unionByName(mbti_in_post_spark)
combined_df.printSchema()

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 21, Finished, Available)

root
 |-- comment_text: string (nullable = true)
 |-- mbti_type_related: string (nullable = true)



In [18]:
#print the number of the row
row_count = combined_df.count()
print("Number of rows:", row_count)

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 23, Finished, Available)

Number of rows: 540780


In [14]:
combined_df_csv=combined_df.toPandas()
combined_df_csv.head(10)

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 19, Finished, Available)

Unnamed: 0,comment_text,mbti_type_related
0,I think you may be an ENFP\n\nNe: dominant fun...,"ENFP, ENFP"
1,isfp,ISFP
2,"ENTJ, or ESTJ","ENTJ, ESTJ"
3,ENFP. (Sing in Bill Nye the science guy’s them...,ENFP
4,Intj,INTJ
5,Because we're pretty open minded despite appea...,INFJ
6,Istp,ISTP
7,My part of our bedroom- Colorful and relativel...,"ENFP, ESTJ, ENTP"
8,"Entp actually, but you were close",ENTP
9,infp here and it's very messy. i can't stand i...,INFP


In [15]:
#save the table so that we can show the table in website
combined_df_csv.head(10).to_csv("Users/xx123/fall-2023-reddit-project-team-10/data/csv/combined_text_arm.csv")

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 20, Finished, Available)

In [19]:
#coach the combined dataframe
combined_df.cache()

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 24, Finished, Available)

DataFrame[comment_text: string, mbti_type_related: string]

## Build FP-Growth model

In [20]:
from pyspark.sql.functions import split

# Split strings into lists
combined_df = combined_df.withColumn("mbti_type_related", split(combined_df["mbti_type_related"], ", "))


StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 25, Finished, Available)

In [21]:
from pyspark.sql.functions import array_distinct
combined_df = combined_df.withColumn("mbti_type_related", array_distinct("mbti_type_related"))

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 26, Finished, Available)

In [22]:
from pyspark.ml.fpm import FPGrowth

# Create FP-Growth models
fpGrowth = FPGrowth(itemsCol="mbti_type_related", minSupport=0.01, minConfidence=0.1)

# Training models
model = fpGrowth.fit(combined_df)

# View frequent itemsets
model.freqItemsets.show()

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 27, Finished, Available)



+------------+-----+
|       items| freq|
+------------+-----+
|      [INFP]|96834|
|      [INTP]|89646|
|[INTP, INFP]|11639|
|      [INTJ]|83758|
|[INTJ, INTP]|12858|
|[INTJ, INFP]| 8479|
|      [INFJ]|78067|
|[INFJ, INTJ]|10971|
|[INFJ, INTP]| 8329|
|[INFJ, INFP]|12142|
|      [ENTP]|72500|
|[ENTP, INTJ]| 8368|
|[ENTP, INTP]|11257|
|[ENTP, INFJ]| 7251|
|[ENTP, INFP]| 7095|
|      [ENFP]|59497|
|[ENFP, INTJ]| 6772|
|[ENFP, ENTP]| 9222|
|[ENFP, INTP]| 6413|
|[ENFP, INFJ]| 7326|
+------------+-----+
only showing top 20 rows



In [27]:
frequent_itemsets_df = model.freqItemsets.toPandas()
frequent_itemsets_df

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 32, Finished, Available)



Unnamed: 0,items,freq
0,[INFP],96834
1,[INTP],89646
2,"[INTP, INFP]",11639
3,[INTJ],83758
4,"[INTJ, INTP]",12858
5,"[INTJ, INFP]",8479
6,[INFJ],78067
7,"[INFJ, INTJ]",10971
8,"[INFJ, INTP]",8329
9,"[INFJ, INFP]",12142


In [46]:
model.freqItemsets.count()

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 19, 51, Finished, Available)


Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.



41

In [28]:
#save the freqitemsets as csv
frequent_itemsets_df.to_csv("Users/xx123/fall-2023-reddit-project-team-10/data/csv/fp_growth_freqitemsets.csv", header=True)

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 33, Finished, Available)

In [29]:
# check the associationRules
model.associationRules.show()

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 34, Finished, Available)



+----------+----------+-------------------+------------------+--------------------+
|antecedent|consequent|         confidence|              lift|             support|
+----------+----------+-------------------+------------------+--------------------+
|    [ISTP]|    [INTJ]| 0.1345528832760702| 0.868735024929359|0.010717851991567735|
|    [ISTP]|    [INTP]|0.18070387222583342|1.0900769696616268|0.014394023447612708|
|    [ENTJ]|    [INTJ]|0.18758659558978935|1.2111449552645275|0.014771256333444284|
|    [ENTJ]|    [ENTP]|0.15060000469670995|1.1233306281363697|0.011858796553126964|
|    [ENTJ]|    [INTP]|0.13655684193222647|0.8237646853190264|0.010752986427012834|
|    [INFJ]|    [INTJ]|0.14053313179704613|0.9073462476803005| 0.02028736269832464|
|    [INFJ]|    [INTP]|0.10669040695812572|0.6435985796891688|0.015401826990643144|
|    [INFJ]|    [INFP]|0.15553306774949724|0.8685913251293257| 0.02245275343023041|
|    [INFP]|    [INTP]|0.12019538591816924|0.7250659348641051|0.021522615481

In [30]:
model.associationRules.count()

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 35, Finished, Available)

31

Since throughout our session, we preferred to discover the probability of another item occurring if the ISTP is known to occur, we use confidence for ranking here.

In [31]:
from pyspark.sql.functions import col

# Sorted in descending order of confidence
conf_rules = model.associationRules.orderBy(col("confidence").desc())

# Show these rules
conf_rules.show()


StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 36, Finished, Available)

+----------+----------+-------------------+------------------+--------------------+
|antecedent|consequent|         confidence|              lift|             support|
+----------+----------+-------------------+------------------+--------------------+
|    [ISFP]|    [INFP]|0.23514518300108966|1.3131938375294758|0.013567439624246459|
|    [ENFJ]|    [INFJ]|0.20289948870623262|1.4055104654022377|0.011594363696882281|
|    [ESTP]|    [ENTP]| 0.1989766331229746|1.4841735677274785| 0.01078627168164503|
|    [ISTJ]|    [INTJ]|0.19597530299565516|1.2653062913869768|0.011093235696586412|
|    [ENTJ]|    [INTJ]|0.18758659558978935|1.2111449552645275|0.014771256333444284|
|    [ENFJ]|    [INFP]| 0.1825124587405346|1.0192606670973656|0.010429379784755353|
|    [ISTP]|    [INTP]|0.18070387222583342|1.0900769696616268|0.014394023447612708|
|    [ENFP]|    [INFP]|0.17002537943089568|0.9495252151996176| 0.01870631310329524|
|    [INFJ]|    [INFP]|0.15553306774949724|0.8685913251293257| 0.02245275343

But lift is also a good measure of whether this rule is valid and really relevant, so check this in descending order as well.

In [32]:
# Sorted in descending order of lift
lift_rules = model.associationRules.orderBy(col("lift").desc())

# Show these rules
lift_rules.show()

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 37, Finished, Available)

+----------+----------+-------------------+------------------+--------------------+
|antecedent|consequent|         confidence|              lift|             support|
+----------+----------+-------------------+------------------+--------------------+
|    [ESTP]|    [ENTP]| 0.1989766331229746|1.4841735677274785| 0.01078627168164503|
|    [ENFJ]|    [INFJ]|0.20289948870623262|1.4055104654022377|0.011594363696882281|
|    [ISFP]|    [INFP]|0.23514518300108966|1.3131938375294758|0.013567439624246459|
|    [ISTJ]|    [INTJ]|0.19597530299565516|1.2653062913869768|0.011093235696586412|
|    [ENTJ]|    [INTJ]|0.18758659558978935|1.2111449552645275|0.014771256333444284|
|    [ENFP]|    [ENTP]|0.15499941173504547|1.1561459569390053| 0.01705314545656274|
|    [ENTP]|    [ENFP]|             0.1272|1.1561459569390053| 0.01705314545656274|
|    [ENTJ]|    [ENTP]|0.15060000469670995|1.1233306281363697|0.011858796553126964|
|    [ISTP]|    [INTP]|0.18070387222583342|1.0900769696616268|0.014394023447

### 

For general association rule mining, confidence and lift are usually the most popular metrics. If your goal is to discover rules that are strongly related but may not be as obvious, lift is a good choice. If you are concerned with the strength and reliability of the rules, then ranking by confidence is appropriate. If you want to find the most common patterns of association in your data set, then consider sorting by support.


In [33]:
import pandas as pd

rules_pd = conf_rules.toPandas()
rules_pd

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 38, Finished, Available)

Unnamed: 0,antecedent,consequent,confidence,lift,support
0,[ISFP],[INFP],0.235145,1.313194,0.013567
1,[ENFJ],[INFJ],0.202899,1.40551,0.011594
2,[ESTP],[ENTP],0.198977,1.484174,0.010786
3,[ISTJ],[INTJ],0.195975,1.265306,0.011093
4,[ENTJ],[INTJ],0.187587,1.211145,0.014771
5,[ENFJ],[INFP],0.182512,1.019261,0.010429
6,[ISTP],[INTP],0.180704,1.090077,0.014394
7,[ENFP],[INFP],0.170025,0.949525,0.018706
8,[INFJ],[INFP],0.155533,0.868591,0.022453
9,[ENTP],[INTP],0.155269,0.936644,0.020816


In [56]:
rules_pd.to_csv("Users/xx123/fall-2023-reddit-project-team-10/data/csv/ordered_association_rules.csv")

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 19, 61, Finished, Available)

In [35]:
#save the lift desc order to csv
lift_pd = lift_rules.toPandas()
lift_pd
lift_pd.to_csv("Users/xx123/fall-2023-reddit-project-team-10/data/csv/lift_association_rules.csv")

StatementMeta(80dcc4b2-bc50-4e81-91e5-397b7f13252e, 26, 40, Finished, Available)

## Plot the Association Rule Network Graph

In [2]:
import pandas as pd
rules_pd=pd.read_csv("../data/csv/ordered_association_rules.csv")

In [9]:
import plotly.graph_objs as go
import networkx as nx

# Create a graph
G = nx.DiGraph()

# Add nodes and edges
for _, row in rules_pd.iterrows():
    G.add_edge(str(row['antecedent']), str(row['consequent']), weight=row['confidence'])

# Generate position layout
pos = nx.spring_layout(G)

# Create edge trace
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')

# Create node trace
node_x = []
node_y = []
for node in G.nodes():
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)

#color_list=['#ffffd9', '#f5fbc4', '#eaf7b1', '#d6efb3', '#bde5b5', '#97d6b9', '#73c8bd', '#52bcc2', '#37acc3', '#2498c1', '#1f80b8', '#2165ab', '#234da0', '#253795', '#172978', '#081d58']
color_list=['#081d58','#253795','#1f80b8','#97d6b9','#ffffd9']

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers+text',  # Add 'text' to the mode
    text=[node for node in G.nodes()],  # Add node labels
    textposition="bottom center",  # Position of text
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale=color_list,
        reversescale=True,
        color=[],
        size=15,  # Increase node size
        colorbar=dict(
            thickness=15,
            title='Number of Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=1.5))

# Add node text and hover info
node_adjacencies = []
node_text = []
for node, adjacencies in enumerate(G.adjacency()):
    node_adjacencies.append(len(adjacencies[1]))
    node_text.append(f'{adjacencies[0]}')

node_trace.marker.color = node_adjacencies
node_trace.text = node_text

# Create figure
fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                title='<br>Network graph of association rules',
                titlefont_size=23,
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=80),
                annotations=[dict(
                    text="Python plotly library",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002)],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )

fig.show()
