In [0]:
from pyspark.sql.functions import col, regexp_replace, lower, when, size, avg, count
source = '/Volumes/workspace/default/my_volume/Sentiment_Project/enhancements/silver_tweets'
silver_df = spark.read.format('delta').load(source)

In [0]:
display(silver_df.filter(col('Has_Brand_mention') > 0).limit(10))

##**Multi-dimensional Gold Tables**

### **1. Geographic Analysis**

In [0]:
geo_sentiment = (
  silver_df.filter(
  col('Has_Brand_mention') > 0)
  .groupBy(
  'Country', 'Sentiment', 'Sentiment_score'
).agg(
  count('*').alias('Count'),
  avg('Sentiment_score').alias('Average_Sentiment')
))

geo_sentiment.write.format('delta').mode('overwrite').option('mergeSchema', 'true').save('/Volumes/workspace/default/my_volume/Sentiment_Project/enhancements/gold/geo_sentiment')

### **2. Age Group Analysis**

In [0]:
age_sentiment = (
  silver_df.filter(col('Has_Brand_mention')==1)
  .groupBy('Age_group','Sentiment', 'Country', 'Sentiment_score')
  .agg(count('*').alias('Count'))
)

age_sentiment.write.format('delta').mode('overwrite').save('/Volumes/workspace/default/my_volume/Sentiment_Project/enhancements/gold/age_sentiment')

### **3. Time Analysis**

In [0]:
time_sentiment = (
  silver_df.filter(col('Has_Brand_mention') == 1)
  .groupBy('Time_category', 'Sentiment', 'Country', 'Sentiment_score')
  .agg(count('*').alias('Count'))
)

time_sentiment.write.format('delta').mode('overwrite').save('/Volumes/workspace/default/my_volume/Sentiment_Project/enhancements/gold/time_sentiment')

### **4. Brand + Demographic _Cross-Analysis_**

In [0]:
from pyspark.sql.functions import explode

brand_demo_sentiment = (
  silver_df.filter(col('Has_Brand_mention') == 1)
  .withColumn('Brand', explode(col('Detected_brands')))
  .groupBy('Brand', 'Age_group', 'Time_category', 'Country', 'Sentiment', 'Sentiment_score')
  .agg(count('*').alias('Count'))
)

brand_demo_sentiment.write.format('delta').mode('overwrite').save('/Volumes/workspace/default/my_volume/Sentiment_Project/enhancements/gold/brand_demo_sentiment')

In [0]:
display(brand_demo_sentiment.limit(100))