## 2.1

In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import col, lower, when, dayofweek, to_date, lit, avg, from_unixtime, unix_timestamp, array_contains, split, first, countDistinct, count, explode
from pyspark.sql import SparkSession
import re

spark = SparkSession.builder.appName("DDMS2024_Project1").getOrCreate()
sc = spark.sparkContext

# This dict holds the correct schemata for easily loading the CSVs
schemas_dict = {
    'Daily program data': StructType([
        StructField('prog_code', StringType()),
        StructField('title', StringType()),
        StructField('genre', StringType()),
        StructField('air_date', StringType()),
        StructField('air_time', StringType()),
        StructField('Duration', FloatType())
    ]),
    'viewing': StructType([
        StructField('device_id', StringType()),
        StructField('event_date', StringType()),
        StructField('event_time', IntegerType()),
        StructField('mso_code', StringType()),
        StructField('prog_code', StringType()),
        StructField('station_num', StringType())
    ]),
    'viewing_full': StructType([
        StructField('mso_code', StringType()),
        StructField('device_id', StringType()),
        StructField('event_date', IntegerType()),
        StructField('event_time', IntegerType()),
        StructField('station_num', StringType()),
        StructField('prog_code', StringType())
    ]),
    'demographic': StructType([
        StructField('household_id', StringType()),
        StructField('household_size', IntegerType()),
        StructField('num_adults', IntegerType()),
        StructField('num_generations', IntegerType()),
        StructField('adult_range', StringType()),
        StructField('marital_status', StringType()),
        StructField('race_code', StringType()),
        StructField('presence_children', StringType()),
        StructField('num_children', IntegerType()),
        StructField('age_children', StringType()),  # format like range - 'bitwise'
        StructField('age_range_children', StringType()),
        StructField('dwelling_type', StringType()),
        StructField('home_owner_status', StringType()),
        StructField('length_residence', IntegerType()),
        StructField('home_market_value', StringType()),
        StructField('num_vehicles', IntegerType()),
        StructField('vehicle_make', StringType()),
        StructField('vehicle_model', StringType()),
        StructField('vehicle_year', IntegerType()),
        StructField('net_worth', IntegerType()),
        StructField('income', StringType()),
        StructField('gender_individual', StringType()),
        StructField('age_individual', IntegerType()),
        StructField('education_highest', StringType()),
        StructField('occupation_highest', StringType()),
        StructField('education_1', StringType()),
        StructField('occupation_1', StringType()),
        StructField('age_2', IntegerType()),
        StructField('education_2', StringType()),
        StructField('occupation_2', StringType()),
        StructField('age_3', IntegerType()),
        StructField('education_3', StringType()),
        StructField('occupation_3', StringType()),
        StructField('age_4', IntegerType()),
        StructField('education_4', StringType()),
        StructField('occupation_4', StringType()),
        StructField('age_5', IntegerType()),
        StructField('education_5', StringType()),
        StructField('occupation_5', StringType()),
        StructField('polit_party_regist', StringType()),
        StructField('polit_party_input', StringType()),
        StructField('household_clusters', StringType()),
        StructField('insurance_groups', StringType()),
        StructField('financial_groups', StringType()),
        StructField('green_living', StringType())
    ])
}

# Read a CSV into a dataframe
def load_csv_file(filename, schema):
    allowed_files = {
        'Daily program data': ('Daily program data', "|"),
        'demographic': ('demographic', "|")
    }

    if filename not in allowed_files.keys():
        print(f'You were trying to access unknown file \"{filename}\". Only valid options are {allowed_files.keys()}')
        return None

    filepath = allowed_files[filename][0]
    dataPath = f"dbfs:/mnt/coursedata2024/fwm-stb-data/{filepath}"
    delimiter = allowed_files[filename][1]

    df = spark.read.format("csv")\
        .option("header", "false")\
        .option("delimiter", delimiter)\
        .schema(schema)\
        .load(dataPath)
    return df


# Load the data
demographic_data = load_csv_file('demographic', schemas_dict['demographic'])
daily_program_data = load_csv_file('Daily program data', schemas_dict['Daily program data'])
reference_data = spark.read.parquet('dbfs:/refxml_new_parquet')
dataPath = f"dbfs:/viewing_10M"
program_viewing_data = spark.read.format("csv")\
    .option("header", "true")\
    .option("delimiter", ",")\
    .schema(schemas_dict['viewing_full'])\
    .load(dataPath)

# Filter out 'Unknown' DMA
reference_data = reference_data.filter(reference_data.dma != 'Unknown')

# Get top 10 DMAs by number of devices
top_dmas = (reference_data.groupBy("dma")
            .agg(count("device_id").alias("device_count"))
            .orderBy(col("device_count").desc())
            .limit(10)
            .collect())

top_dma_names = [row['dma'] for row in top_dmas]

# Explode genres into individual rows
daily_program_data = daily_program_data.withColumn("genre", explode(split(col("genre"), ",")))

# Join dataframes to get DMA and genres together
viewing_with_dma = (program_viewing_data.join(reference_data, "device_id")
                    .join(daily_program_data, "prog_code")
                    .select("dma", "genre"))

# Function to sanitize DMA name
def sanitize_dma_name(dma_name):
    # Replace spaces with underscores and remove special characters
    return re.sub(r'\W+', '', dma_name.replace(" ", "_"))

# Function to create and save genre-popularity dataframe for each DMA
def create_genre_popularity_df(dma_name):
    genre_popularity = (viewing_with_dma.filter(col("dma") == dma_name)
                        .groupBy("genre")
                        .agg(count("genre").alias("popularity"))
                        .orderBy(col("popularity").desc()))
    return genre_popularity


#Our ids
id1 = "324075548"
id2 = "326172756"

# Display sample DataFrames for all top 10 DMAs
for dma_name in top_dma_names:
    sanitized_dma_name = sanitize_dma_name(dma_name)
    file_name = f"project1_part21_{sanitized_dma_name}_{id1}_{id2}"
    print(f"Data for: {file_name}")
    sample_df = create_genre_popularity_df(dma_name)
    display(sample_df)
    print("\n")

# Show the top 10 genres for the 1st, 5th, and 9th largest DMAs
for index in [0, 4, 8]:
    dma_name = top_dma_names[index]
    print(f"Top 10 genres for DMA: {dma_name}")
    create_genre_popularity_df(dma_name).show(10, truncate=False)


Data for: project1_part21_Wilkes_BarreScrantonHztn_324075548_326172756


genre,popularity
Shopping,261219839
News,148852784
Sports non-event,85822616
Sports talk,85567230
Weather,64017959
Consumer,56866440
Music,49066687
Special,11731614
Variety,10097387
Talk,8871315




Data for: project1_part21_CharlestonHuntington_324075548_326172756


genre,popularity
Shopping,600951304
News,177107438
Sports non-event,127370002
Sports talk,124124718
Consumer,80987076
Music,51516063
Community,36293977
Public affairs,34083231
Variety,29858148
Special,17404367




Data for: project1_part21_SeattleTacoma_324075548_326172756


genre,popularity
Shopping,242530329
News,103858008
Consumer,64741442
Sports non-event,52226020
Sports talk,51220421
Music,24606550
Special,6603163
Talk,6513109
Religious,5346312
Sitcom,3085499




Data for: project1_part21_Toledo_324075548_326172756


genre,popularity
Shopping,87077505
News,53445738
Sports non-event,32979662
Sports talk,31346200
Consumer,20058315
Music,5736974
Special,4019246
Weather,2222742
Talk,1987413
Religious,1689615




Data for: project1_part21_Little_RockPine_Bluff_324075548_326172756


genre,popularity
Shopping,329229821
News,72654700
Sports non-event,46145770
Sports talk,44502530
Consumer,38487341
Music,12236128
Special,9423872
Religious,4977461
Talk,4524141
Sitcom,3377080




Data for: project1_part21_Amarillo_324075548_326172756


genre,popularity
Shopping,580463283
News,86429073
Sports non-event,56444819
Sports talk,55209189
Consumer,43626934
Home improvement,13349425
Music,11279782
Special,7230447
Religious,5858670
Talk,5745506




Data for: project1_part21_Bend_OR_324075548_326172756


genre,popularity
Shopping,370453913
News,87146042
Sports non-event,49062494
Sports talk,46129722
Consumer,40658385
Music,12415461
Special,9979323
Talk,6616623
Politics,4330463
Sitcom,4323627




Data for: project1_part21_GreenvilleNBernWashngtn_324075548_326172756


genre,popularity
Shopping,223848590
News,104028390
Sports non-event,74970993
Sports talk,73776971
Consumer,36821044
Music,28789805
Variety,9655647
Religious,7721519
Special,6479778
Children,6478107




Data for: project1_part21_Washington_DC_Hagrstwn_324075548_326172756


genre,popularity
Shopping,127192752
News,40869644
Sports non-event,30894549
Sports talk,29481797
Consumer,23704559
Music,9370470
Special,3695182
Religious,2361376
Talk,2361299
Sitcom,1729578




Data for: project1_part21_Houston_324075548_326172756


genre,popularity
Shopping,193665263
News,61742339
Consumer,43925123
Sports talk,36959975
Sports non-event,36889521
Music,8321993
Special,5598059
Talk,4216699
Religious,3945243
Children,2987073




Top 10 genres for DMA: Wilkes Barre-Scranton-Hztn
+----------------+----------+
|genre           |popularity|
+----------------+----------+
|Shopping        |261219839 |
|News            |148852784 |
|Sports non-event|85822616  |
|Sports talk     |85567230  |
|Weather         |64017959  |
|Consumer        |56866440  |
|Music           |49066687  |
|Special         |11731614  |
|Variety         |10097387  |
|Talk            |8871315   |
+----------------+----------+
only showing top 10 rows

Top 10 genres for DMA: Little Rock-Pine Bluff
+----------------+----------+
|genre           |popularity|
+----------------+----------+
|Shopping        |329229821 |
|News            |72654700  |
|Sports non-event|46145770  |
|Sports talk     |44502530  |
|Consumer        |38487341  |
|Music           |12236128  |
|Special         |9423872   |
|Religious       |4977461   |
|Talk            |4524141   |
|Sitcom          |3377080   |
+----------------+----------+
only showing top 10 rows

Top 10 gen

## 2.2

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.types import *
from pyspark.sql.window import Window
import re
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("DDMS2024_Project1").getOrCreate()
sc = spark.sparkContext

# This dict holds the correct schemata for easily loading the CSVs
schemas_dict = {
    'Daily program data': StructType([
        StructField('prog_code', StringType()),
        StructField('title', StringType()),
        StructField('genre', StringType()),
        StructField('air_date', StringType()),
        StructField('air_time', StringType()),
        StructField('Duration', FloatType())
    ]),
    'viewing': StructType([
        StructField('device_id', StringType()),
        StructField('event_date', StringType()),
        StructField('event_time', IntegerType()),
        StructField('mso_code', StringType()),
        StructField('prog_code', StringType()),
        StructField('station_num', StringType())
    ]),
    'viewing_full': StructType([
        StructField('mso_code', StringType()),
        StructField('device_id', StringType()),
        StructField('event_date', IntegerType()),
        StructField('event_time', IntegerType()),
        StructField('station_num', StringType()),
        StructField('prog_code', StringType())
    ]),
    'demographic': StructType([
        StructField('household_id', StringType()),
        StructField('household_size', IntegerType()),
        StructField('num_adults', IntegerType()),
        StructField('num_generations', IntegerType()),
        StructField('adult_range', StringType()),
        StructField('marital_status', StringType()),
        StructField('race_code', StringType()),
        StructField('presence_children', StringType()),
        StructField('num_children', IntegerType()),
        StructField('age_children', StringType()),  # format like range - 'bitwise'
        StructField('age_range_children', StringType()),
        StructField('dwelling_type', StringType()),
        StructField('home_owner_status', StringType()),
        StructField('length_residence', IntegerType()),
        StructField('home_market_value', StringType()),
        StructField('num_vehicles', IntegerType()),
        StructField('vehicle_make', StringType()),
        StructField('vehicle_model', StringType()),
        StructField('vehicle_year', IntegerType()),
        StructField('net_worth', IntegerType()),
        StructField('income', StringType()),
        StructField('gender_individual', StringType()),
        StructField('age_individual', IntegerType()),
        StructField('education_highest', StringType()),
        StructField('occupation_highest', StringType()),
        StructField('education_1', StringType()),
        StructField('occupation_1', StringType()),
        StructField('age_2', IntegerType()),
        StructField('education_2', StringType()),
        StructField('occupation_2', StringType()),
        StructField('age_3', IntegerType()),
        StructField('education_3', StringType()),
        StructField('occupation_3', StringType()),
        StructField('age_4', IntegerType()),
        StructField('education_4', StringType()),
        StructField('occupation_4', StringType()),
        StructField('age_5', IntegerType()),
        StructField('education_5', StringType()),
        StructField('occupation_5', StringType()),
        StructField('polit_party_regist', StringType()),
        StructField('polit_party_input', StringType()),
        StructField('household_clusters', StringType()),
        StructField('insurance_groups', StringType()),
        StructField('financial_groups', StringType()),
        StructField('green_living', StringType())
    ])
}
# Function to Convert Base 14 to Base 10
def base14_to_base10(income_str):
    base14_chars = "0123456789ABCD"
    base10_value = 0
    for i, char in enumerate(reversed(income_str)):
        base10_value += base14_chars.index(char) * (14 ** i)
    return base10_value

# Register the function as a UDF
base14_to_base10_udf = udf(base14_to_base10, IntegerType())

# Load CSV file function
def load_csv_file(filename, schema):
    allowed_files = {
        'Daily program data': ('Daily program data', "|"),
        'demographic': ('demographic', "|")
    }
    if filename not in allowed_files.keys():
        raise ValueError(f'Invalid filename. Allowed options are {allowed_files.keys()}')

    filepath = allowed_files[filename][0]
    dataPath = f"dbfs:/mnt/coursedata2024/fwm-stb-data/{filepath}"
    delimiter = allowed_files[filename][1]

    return spark.read.format("csv") \
        .option("header", "false") \
        .option("delimiter", delimiter) \
        .schema(schema) \
        .load(dataPath)


# Load data from th tables:
demographic_data = load_csv_file('demographic', schemas_dict['demographic']) \
    .select('household_id', 'net_worth', 'income').dropna()
daily_program_data = load_csv_file('Daily program data', schemas_dict['Daily program data']) \
    .select('prog_code', 'genre').dropna()
reference_data = spark.read.parquet('dbfs:/refxml_new_parquet') \
    .select('household_id', 'device_id', 'dma').dropna()
program_viewing_data = spark.read.csv("dbfs:/viewing_10M", schema=schemas_dict['viewing_full'], header=True) \
    .select('device_id', 'prog_code').dropna()
demographic_data = demographic_data.withColumn("income_numeric", base14_to_base10_udf(F.col("income"))).drop("income")


# Calculate wealth data - avgerage net worth and avg income for the formula later
wealth_data = demographic_data.join(reference_data, 'household_id') \
    .groupBy('dma') \
    .agg(F.avg('net_worth').alias('avg_net_worth'), F.avg('income_numeric').alias('avg_income'))


#calculate the max values for the formula
max_values = demographic_data.agg(F.max('net_worth').alias('max_net_worth'), F.max('income_numeric').alias('max_income')).collect()[0]


# Calculate wealth score by the formula (avg new worth/ max net worth)+(avg income/max income)
wealth_score = wealth_data \
    .withColumn('wealth_score', 
                (F.col('avg_net_worth') / max_values['max_net_worth'] + 
                 F.col('avg_income') / max_values['max_income']) / 2) \
    .select('dma', 'wealth_score') \
    .orderBy(F.desc('wealth_score')) #clac wealth score and order by descnding order of wealth score


# Calculate genre popularity - The window_spec is defined before it's used in the genre_popularity calculation
window_spec = Window.partitionBy('dma').orderBy(F.desc('popularity')) #partition by dma and rders the data within each partition by the popularity column in descending order

# First Join program_viewing_data with reference_data on device_id, then Join the result with daily_program_data on prog_code
# then Group by dma and genre, then count the occurrences of each dma and genre combination
# then Rename the count column to popularity, then Rank the genres by popularity within each DMA
genre_popularity = program_viewing_data.join(F.broadcast(reference_data), 'device_id') \
    .join(F.broadcast(daily_program_data), 'prog_code') \
    .groupBy('dma', 'genre') \
    .count() \
    .withColumnRenamed('count', 'popularity') \
    .withColumn('rank', F.rank().over(window_spec)) #join tables first then group by dma and genre then count popularity and rank as to popularity



# Function to clean DMA name for filename
def clean_dma_name(dma_name):
    # Replace spaces with underscores
    name = dma_name.replace(" ", "_")
    # Remove all other special characters
    return re.sub(r'[^a-zA-Z0-9_]', '', name)


# UDF for cleaning DMA names
clean_dma_udf = F.udf(clean_dma_name, StringType())

# Explode the genre list to get individual genres
exploded_genre_popularity = genre_popularity.withColumn('genre', F.explode(F.split(F.col('genre'), ',')))

# Initialize an empty set for the blacklist
blacklist = set()

# take only the top dma with the highest wealth score
top_10_dmas = wealth_score.limit(10)

# Process each DMA separately
for i, dma_row in enumerate(top_10_dmas.collect()):
    dma = dma_row['dma']
    wealth_score_value = dma_row['wealth_score'] #the wealth score
    
    # Filter for the DMA, then exclude genres in the blacklist, then remove Duplicate Genres 
    # then rank Genres by popularity, then Filter Top 11 Genres and select only the genre column
    dma_genres = exploded_genre_popularity.filter(F.col('dma') == dma) \
        .filter(~F.col('genre').isin(blacklist)) \
        .dropDuplicates(['genre']) \
        .withColumn('rank', F.rank().over(Window.partitionBy('dma').orderBy(F.desc('popularity')))) 

    # Filter to get the top 11 genres per DMA
    top_11_genres_per_dma = dma_genres.filter(F.col('rank') <= 11)
    # This ensures the result does not exceed 11 rows.
    top_11_genres_per_dma = top_11_genres_per_dma.limit(11)
    # Select only the genre column
    top_11_genres_only = top_11_genres_per_dma.select('genre')
    # Collect genres as a list
    genres_list = top_11_genres_only.rdd.flatMap(lambda row: row).collect()

    # Ensure we have exactly 11 unique genres
    if len(genres_list) < 11:
        # Fill the remaining slots with empty strings
        genres_list.extend([''] * (11 - len(genres_list)))

    blacklist.update(genres_list) # Update the blacklist with the newly selected genres
    
    # Create a DataFrame with one row per genre
    genre_rows = [(dma, wealth_score_value, genre) for genre in genres_list]
    dma_result = spark.createDataFrame(genre_rows, ['DMA NAME', 'WEALTH SCORE', 'ORDERED LIST OF GENRES'])

   
    # Clean DMA name for filename
    clean_name = clean_dma_name(dma)
    file_name = f"project1_part22_{clean_name}_324075548_326172756.csv" #file nameas requested
    
    # Display results
    print(f"\nData for: {file_name}")
    display(dma_result)
    
    # Display results for 1st, 5th, and 9th DMAs
    if i in [0, 4, 8]:
        genres = dma_result.select('ORDERED LIST OF GENRES').rdd.flatMap(lambda row: row).collect()
        print(f"\nTop genres for DMA {dma} (Wealth Score: {wealth_score_value}):") #print dma and its wealth score
        print(genres)# print the genres for each dma
   



Data for: project1_part22_San_Antonio_324075548_326172756.csv


DMA NAME,WEALTH SCORE,ORDERED LIST OF GENRES
San Antonio,0.811965811965812,News
San Antonio,0.811965811965812,Weather
San Antonio,0.811965811965812,Sitcom
San Antonio,0.811965811965812,Newsmagazine
San Antonio,0.811965811965812,Comedy
San Antonio,0.811965811965812,Talk
San Antonio,0.811965811965812,Auto
San Antonio,0.811965811965812,Reality
San Antonio,0.811965811965812,Cooking
San Antonio,0.811965811965812,Drama



Top genres for DMA San Antonio (Wealth Score: 0.811965811965812):
['News', 'Weather', 'Sitcom', 'Newsmagazine', 'Comedy', 'Talk', 'Auto', 'Reality', 'Cooking', 'Drama', 'Western']

Data for: project1_part22_San_FranciscoOakSan_Jose_324075548_326172756.csv


DMA NAME,WEALTH SCORE,ORDERED LIST OF GENRES
San Francisco-Oak-San Jose,0.7687827880931328,Shopping
San Francisco-Oak-San Jose,0.7687827880931328,Consumer
San Francisco-Oak-San Jose,0.7687827880931328,Religious
San Francisco-Oak-San Jose,0.7687827880931328,Sports talk
San Francisco-Oak-San Jose,0.7687827880931328,Comedy-drama
San Francisco-Oak-San Jose,0.7687827880931328,Variety
San Francisco-Oak-San Jose,0.7687827880931328,Anthology
San Francisco-Oak-San Jose,0.7687827880931328,Game show
San Francisco-Oak-San Jose,0.7687827880931328,Romance-comedy
San Francisco-Oak-San Jose,0.7687827880931328,Soap



Data for: project1_part22_Baltimore_324075548_326172756.csv


DMA NAME,WEALTH SCORE,ORDERED LIST OF GENRES
Baltimore,0.7611870628817368,Football
Baltimore,0.7611870628817368,Musical
Baltimore,0.7611870628817368,Pro wrestling
Baltimore,0.7611870628817368,Horror
Baltimore,0.7611870628817368,Action sports
Baltimore,0.7611870628817368,Card games
Baltimore,0.7611870628817368,Poker
Baltimore,0.7611870628817368,Action
Baltimore,0.7611870628817368,Adventure
Baltimore,0.7611870628817368,Home improvement



Data for: project1_part22_SacramntoStktonModesto_324075548_326172756.csv


DMA NAME,WEALTH SCORE,ORDERED LIST OF GENRES
Sacramnto-Stkton-Modesto,0.7405096974448204,Basketball
Sacramnto-Stkton-Modesto,0.7405096974448204,Art
Sacramnto-Stkton-Modesto,0.7405096974448204,Science
Sacramnto-Stkton-Modesto,0.7405096974448204,Boxing
Sacramnto-Stkton-Modesto,0.7405096974448204,Sports event
Sacramnto-Stkton-Modesto,0.7405096974448204,Nature
Sacramnto-Stkton-Modesto,0.7405096974448204,Educational
Sacramnto-Stkton-Modesto,0.7405096974448204,Medical
Sacramnto-Stkton-Modesto,0.7405096974448204,House/garden
Sacramnto-Stkton-Modesto,0.7405096974448204,Hockey



Data for: project1_part22_Bend_OR_324075548_326172756.csv


DMA NAME,WEALTH SCORE,ORDERED LIST OF GENRES
"Bend, OR",0.7283595037647783,Community
"Bend, OR",0.7283595037647783,Entertainment
"Bend, OR",0.7283595037647783,Crime
"Bend, OR",0.7283595037647783,Law
"Bend, OR",0.7283595037647783,Special
"Bend, OR",0.7283595037647783,Animated
"Bend, OR",0.7283595037647783,Awards
"Bend, OR",0.7283595037647783,Travel
"Bend, OR",0.7283595037647783,Boat
"Bend, OR",0.7283595037647783,Suspense



Top genres for DMA Bend, OR (Wealth Score: 0.7283595037647783):
['Community', 'Entertainment', 'Crime', 'Law', 'Special', 'Animated', 'Awards', 'Travel', 'Boat', 'Suspense', 'Auto racing']

Data for: project1_part22_Houston_324075548_326172756.csv


DMA NAME,WEALTH SCORE,ORDERED LIST OF GENRES
Houston,0.7177498034535345,Romance
Houston,0.7177498034535345,Public affairs
Houston,0.7177498034535345,Animals
Houston,0.7177498034535345,Historical drama
Houston,0.7177498034535345,War
Houston,0.7177498034535345,Musical comedy
Houston,0.7177498034535345,Self improvement
Houston,0.7177498034535345,Dance
Houston,0.7177498034535345,Crime drama
Houston,0.7177498034535345,Golf



Data for: project1_part22_Austin_324075548_326172756.csv


DMA NAME,WEALTH SCORE,ORDERED LIST OF GENRES
Austin,0.7128035422259249,Politics
Austin,0.7128035422259249,Baseball
Austin,0.7128035422259249,Interview
Austin,0.7128035422259249,Fashion
Austin,0.7128035422259249,Auction
Austin,0.7128035422259249,Collectibles
Austin,0.7128035422259249,Paranormal
Austin,0.7128035422259249,Health
Austin,0.7128035422259249,Wrestling
Austin,0.7128035422259249,How-to



Data for: project1_part22_MiamiFt_Lauderdale_324075548_326172756.csv


DMA NAME,WEALTH SCORE,ORDERED LIST OF GENRES
Miami-Ft. Lauderdale,0.7101039507467781,Soccer
Miami-Ft. Lauderdale,0.7101039507467781,Bus./financial
Miami-Ft. Lauderdale,0.7101039507467781,Arts/crafts
Miami-Ft. Lauderdale,0.7101039507467781,Standup
Miami-Ft. Lauderdale,0.7101039507467781,Tennis
Miami-Ft. Lauderdale,0.7101039507467781,Mystery
Miami-Ft. Lauderdale,0.7101039507467781,Fantasy
Miami-Ft. Lauderdale,0.7101039507467781,Performing arts
Miami-Ft. Lauderdale,0.7101039507467781,Swimming
Miami-Ft. Lauderdale,0.7101039507467781,Sports non-event



Data for: project1_part22_SeattleTacoma_324075548_326172756.csv


DMA NAME,WEALTH SCORE,ORDERED LIST OF GENRES
Seattle-Tacoma,0.7080045191318095,Children
Seattle-Tacoma,0.7080045191318095,Outdoors
Seattle-Tacoma,0.7080045191318095,Aviation
Seattle-Tacoma,0.7080045191318095,Computers
Seattle-Tacoma,0.7080045191318095,Agriculture
Seattle-Tacoma,0.7080045191318095,Exercise
Seattle-Tacoma,0.7080045191318095,Beach soccer
Seattle-Tacoma,0.7080045191318095,Beach volleyball
Seattle-Tacoma,0.7080045191318095,Docudrama
Seattle-Tacoma,0.7080045191318095,Music



Top genres for DMA Seattle-Tacoma (Wealth Score: 0.7080045191318095):
['Children', 'Outdoors', 'Aviation', 'Computers', 'Agriculture', 'Exercise', 'Beach soccer', 'Beach volleyball', 'Docudrama', 'Music', 'Gaming']

Data for: project1_part22_Detroit_324075548_326172756.csv


DMA NAME,WEALTH SCORE,ORDERED LIST OF GENRES
Detroit,0.6779135713715153,Bowling
Detroit,0.6779135713715153,History
Detroit,0.6779135713715153,Horse
Detroit,0.6779135713715153,Military
Detroit,0.6779135713715153,Playoff sports
Detroit,0.6779135713715153,Hunting
Detroit,0.6779135713715153,Rodeo
Detroit,0.6779135713715153,Fishing
Detroit,0.6779135713715153,Volleyball
Detroit,0.6779135713715153,Parade
