# Task I

In [1]:
# Creating the Spark Session
import pyspark
from pyspark import SparkContext, SparkConf, SQLContext
from pyspark.sql import SparkSession


spark = SparkSession.builder \
    .master("local[*]") \
    .appName("GenericAppName") \
    .getOrCreate()

#Access SparkContext from your SparkSession
print("APP Name :"+ spark.sparkContext.appName);
print("Master :"+ spark.sparkContext.master);

22/11/29 02:38:39 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.
APP Name :PySparkShell
Master :local[*]


This loads the data from all the files into one spark dataframe

In [2]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
# These are all the databases per year we will analyze.
files = ["players_15", "players_16", "players_17", "players_18",
         "players_19", "players_20", "players_21", "players_22"]
"dataproc-staging-us-east1-358350887320-z748jysx/archive"
# Reading the .csv files and adding year column for each corresponding database year.
df = spark.read.csv("./archive/"+files[0]+".csv", header=True, inferSchema= True).withColumn("year", lit(2015))
for i in range(1,len(files)):
    df = df.union(spark.read.csv("./archive/"+files[i]+".csv", header=True, inferSchema= True).withColumn("year", lit(2015+i)))

# Adding id column with unique id for every entry. 
# This makes it so the dataframe has only one partition, added to avoid very large numbers for the ids.
df = df.coalesce(1) 
df = df.withColumn("id", monotonically_increasing_id())

# I decided to add the columns before ingesting into postgres so they algo get ingested.

The dataframe is ingested into postgres and then read back to spark in the next cell

In [3]:
db_properties={}
#update your db username
db_properties['username']="postgres"
#update your db password
db_properties['password']="bigdata"
#make sure you got the right port number here
db_properties['url']= "jdbc:postgresql://localhost:5432/postgres"
#make sure you had the Postgres JAR file in the right location
db_properties['driver']="org.postgresql.Driver"
# Using table in fifa schema.
db_properties['table']= "fifa.Players"

# Data ingestion into postgres table
df.write.format("jdbc")\
.mode("overwrite")\
.option("url", db_properties['url'])\
.option("dbtable", db_properties['table'])\
.option("user", db_properties['username'])\
.option("password", db_properties['password'])\
.option("Driver", db_properties['driver'])\
.save()


22/11/29 02:38:45 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.


                                                                                

In [4]:
# Reading database back from postgres.
df_read = sqlContext.read.format("jdbc")\
    .option("url", db_properties['url'])\
    .option("dbtable", db_properties['table'])\
    .option("user", db_properties['username'])\
    .option("password", db_properties['password'])\
    .option("Driver", db_properties['driver'])\
    .load()


Checking the table schema and looking at the id to make sure it is unique 

In [5]:
# Table schema
df_read.printSchema()

root
 |-- sofifa_id: integer (nullable = true)
 |-- player_url: string (nullable = true)
 |-- short_name: string (nullable = true)
 |-- long_name: string (nullable = true)
 |-- player_positions: string (nullable = true)
 |-- overall: integer (nullable = true)
 |-- potential: integer (nullable = true)
 |-- value_eur: double (nullable = true)
 |-- wage_eur: double (nullable = true)
 |-- age: integer (nullable = true)
 |-- dob: timestamp (nullable = true)
 |-- height_cm: integer (nullable = true)
 |-- weight_kg: integer (nullable = true)
 |-- club_team_id: double (nullable = true)
 |-- club_name: string (nullable = true)
 |-- league_name: string (nullable = true)
 |-- league_level: integer (nullable = true)
 |-- club_position: string (nullable = true)
 |-- club_jersey_number: integer (nullable = true)
 |-- club_loaned_from: string (nullable = true)
 |-- club_joined: timestamp (nullable = true)
 |-- club_contract_valid_until: integer (nullable = true)
 |-- nationality_id: integer (nullable

In [6]:
# Shows id column has unique values.
print(df_read.count())
print(df_read.select('id').distinct().count())

142079
142079


In [7]:
# Shows 5 entries.
df_read.show(5, vertical=True)

[Stage 26:>                                                         (0 + 1) / 1]

-RECORD 0-------------------------------------------
 sofifa_id                   | 158023               
 player_url                  | https://sofifa.co... 
 short_name                  | L. Messi             
 long_name                   | Lionel Andrés Mes... 
 player_positions            | CF                   
 overall                     | 93                   
 potential                   | 95                   
 value_eur                   | 1.005E8              
 wage_eur                    | 550000.0             
 age                         | 27                   
 dob                         | 1987-06-24 00:00:00  
 height_cm                   | 169                  
 weight_kg                   | 67                   
 club_team_id                | 241.0                
 club_name                   | FC Barcelona         
 league_name                 | Spain Primera Div... 
 league_level                | 1                    
 club_position               | CF             

                                                                                

# Task II

In [8]:
def player_contracts(df, x):
    # Function prints the x clubs that have the most players with contracts ending in 2023
    df = df.filter((col("year") == 2022) & (col("club_contract_valid_until") == 2023))\
    .groupBy('club_name').count().sort(col('count').desc())
    df.withColumnRenamed('club_name', 'club').withColumnRenamed('count', 'contracts ending 2023').show(x)


In [9]:
player_contracts(df_read, 7)

+--------------------+---------------------+
|                club|contracts ending 2023|
+--------------------+---------------------+
|En Avant de Guingamp|                   19|
| Club Atlético Lanús|                   17|
|       Lechia Gdańsk|                   17|
|            Barnsley|                   16|
|        Kasimpaşa SK|                   16|
|        Bengaluru FC|                   16|
|              Al Tai|                   15|
+--------------------+---------------------+
only showing top 7 rows



In [10]:
def players_over_27(df, y):
    # Function prints the y clubs that have the most players aged over 27 averaged over the years.
    df = df.filter((col("age") > 27) & (col('club_name') != "")).groupBy('club_name', 'year').count()\
        .groupBy('club_name').avg('count').sort(col('avg(count)').desc()) # Calculates the average over the years.
    df.withColumnRenamed('club_name', 'club').withColumnRenamed('avg(count)', 'average players over 27') \
    .show(y)

In [11]:
players_over_27(df_read, 6)

+--------------------+-----------------------+
|                club|average players over 27|
+--------------------+-----------------------+
| Matsumoto Yamaga FC|                   19.0|
|  Dorados de Sinaloa|                   19.0|
| Shanghai Shenhua FC|                   18.5|
|          Qingdao FC|                   18.0|
|Club Deportivo Jo...|                   17.5|
|         Guaireña FC|                   17.0|
+--------------------+-----------------------+
only showing top 6 rows



In [12]:
def most_frequent_position(df):
    # Function prints the most frequent position across national teams every year.
    years = df.select('year').distinct().collect()
    position = []
    for i in range(len(years)):
        position_count = df.groupBy('nation_position','year').count().filter(col("year") == years[i][0]).sort(col('count').desc())
        position.append(position_count.head(2)[1]) # Got the second result as the most common value is null for every year.
    out = spark.createDataFrame(position, schema=position_count.schema)
    out.show()

In [13]:
most_frequent_position(df_read)

+---------------+----+-----+
|nation_position|year|count|
+---------------+----+-----+
|            SUB|2018|  600|
|            SUB|2015|  564|
|            SUB|2022|  396|
|            SUB|2019|  576|
|            SUB|2020|  588|
|            SUB|2016|  511|
|            SUB|2017|  564|
|            SUB|2021|  588|
+---------------+----+-----+



## Task III

Before doing feature engineering, we look at the data and determine what needs to be done

In [14]:
df_read.printSchema() # checking that the schema is correct for each column comparing to the data in it.

root
 |-- sofifa_id: integer (nullable = true)
 |-- player_url: string (nullable = true)
 |-- short_name: string (nullable = true)
 |-- long_name: string (nullable = true)
 |-- player_positions: string (nullable = true)
 |-- overall: integer (nullable = true)
 |-- potential: integer (nullable = true)
 |-- value_eur: double (nullable = true)
 |-- wage_eur: double (nullable = true)
 |-- age: integer (nullable = true)
 |-- dob: timestamp (nullable = true)
 |-- height_cm: integer (nullable = true)
 |-- weight_kg: integer (nullable = true)
 |-- club_team_id: double (nullable = true)
 |-- club_name: string (nullable = true)
 |-- league_name: string (nullable = true)
 |-- league_level: integer (nullable = true)
 |-- club_position: string (nullable = true)
 |-- club_jersey_number: integer (nullable = true)
 |-- club_loaned_from: string (nullable = true)
 |-- club_joined: timestamp (nullable = true)
 |-- club_contract_valid_until: integer (nullable = true)
 |-- nationality_id: integer (nullable

In [15]:
df_read.show(4, vertical=True)

-RECORD 0-------------------------------------------
 sofifa_id                   | 158023               
 player_url                  | https://sofifa.co... 
 short_name                  | L. Messi             
 long_name                   | Lionel Andrés Mes... 
 player_positions            | CF                   
 overall                     | 93                   
 potential                   | 95                   
 value_eur                   | 1.005E8              
 wage_eur                    | 550000.0             
 age                         | 27                   
 dob                         | 1987-06-24 00:00:00  
 height_cm                   | 169                  
 weight_kg                   | 67                   
 club_team_id                | 241.0                
 club_name                   | FC Barcelona         
 league_name                 | Spain Primera Div... 
 league_level                | 1                    
 club_position               | CF             

[Stage 66:>                                                         (0 + 1) / 1]                                                                                

Columns that need casting:

dob - string

club_team_id - integer

club_joined - string

nation_team_id - integer

mentality_composure - number

release_clause_eur - number

ls, st, rs, lw, lf, cf, rf, rw, lam, cam, ram, lm, lcm, cm, rcm, rm, lwb, ldm, cdm, rdm, rwb, lb, lcb, cb, rcb, rb, gk - number

In [16]:
# Did some initial casting before the actual pipeline
# in order to be able to see correlation between columns properly and look for null values

casted_types_df = (df_read.withColumn("dob", df_read.dob.cast("String")) \
                    .withColumn("club_team_id", df_read.club_team_id.cast("Integer")) \
                    .withColumn("club_joined", df_read.club_joined.cast("String")) \
                    .withColumn("nation_team_id", df_read.nation_team_id.cast("Integer")) \
                    .withColumn("mentality_composure", df_read.mentality_composure.cast("Integer"))
                  )


In [17]:
casted_types_df.describe().show(vertical=True)

                                                                                

-RECORD 0-------------------------------------------
 summary                     | count                
 sofifa_id                   | 142079               
 player_url                  | 142079               
 short_name                  | 142079               
 long_name                   | 142079               
 player_positions            | 142079               
 overall                     | 142079               
 potential                   | 142079               
 value_eur                   | 140182               
 wage_eur                    | 140457               
 age                         | 142079               
 dob                         | 142079               
 height_cm                   | 142079               
 weight_kg                   | 142079               
 club_team_id                | 140449               
 club_name                   | 140449               
 league_name                 | 140449               
 league_level                | 140064         

Create a correlation matrix and check for columns with more than 80% correlation between them

In [18]:
correlation_matrix = casted_types_df.toPandas().corr()

                                                                                

In [19]:
correlation_matrix[correlation_matrix.iloc[20].abs() >= 0.8]

Unnamed: 0,sofifa_id,overall,potential,value_eur,wage_eur,age,height_cm,weight_kg,club_team_id,league_level,...,defending_standing_tackle,defending_sliding_tackle,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes,goalkeeping_speed,year,id
passing,-0.307851,0.677281,0.456374,0.40113,0.424744,0.330951,-0.272348,-0.180681,-0.127026,-0.174327,...,0.120986,0.093444,0.02372,0.032269,0.029677,0.023152,0.026857,,0.036093,-0.047691
dribbling,-0.16662,0.60041,0.487096,0.389402,0.389674,0.159526,-0.394867,-0.298686,-0.125699,-0.157314,...,-0.204077,-0.219788,0.010061,0.017674,0.021164,0.008366,0.014209,,0.051217,-0.022019
attacking_crossing,-0.132488,0.396761,0.266526,0.227232,0.253765,0.131272,-0.478272,-0.393012,-0.06702,-0.081738,...,0.413222,0.397107,-0.651215,-0.649375,-0.648342,-0.648888,-0.650954,0.27858,-0.006102,-0.055601
attacking_short_passing,-0.122802,0.497018,0.383729,0.296031,0.314753,0.134887,-0.356334,-0.291827,-0.085158,-0.11621,...,0.522544,0.489599,-0.719706,-0.717546,-0.714398,-0.715072,-0.718967,0.29038,0.034201,-0.02738
skill_long_passing,-0.169145,0.480906,0.341384,0.275719,0.301308,0.181861,-0.322111,-0.260164,-0.082233,-0.113195,...,0.572509,0.547333,-0.589799,-0.587297,-0.584105,-0.585223,-0.588864,0.245466,0.021389,-0.038811
skill_ball_control,-0.097512,0.456417,0.369219,0.275703,0.293757,0.08546,-0.409164,-0.339398,-0.089144,-0.095546,...,0.399255,0.366503,-0.780761,-0.77865,-0.775539,-0.77617,-0.779829,0.37007,0.014349,-0.041757
mentality_vision,-0.17118,0.48919,0.358897,0.321083,0.326438,0.186315,-0.366693,-0.290833,-0.097182,-0.111277,...,0.151052,0.116723,-0.411272,-0.405395,-0.403184,-0.404067,-0.409367,0.045656,0.062335,0.001023


In [20]:
correlation_matrix.iloc[21]

sofifa_id                 -0.166620
overall                    0.600410
potential                  0.487096
value_eur                  0.389402
wage_eur                   0.389674
                             ...   
goalkeeping_positioning    0.008366
goalkeeping_reflexes       0.014209
goalkeeping_speed               NaN
year                       0.051217
id                        -0.022019
Name: dribbling, Length: 61, dtype: float64

In [21]:
# List of columns with correlation greater than 80% to another, these will be dropped.

correlated_columns = ['defending_marking_awareness', 'defending_standing_tackle', 'defending_sliding_tackle', 
 'mentality_interceptions', 'movement_acceleration', 'movement_sprint_speed', 
 'attacking_finishing', 'attacking_volleys', 'power_shot_power', 'power_long_shots', 
 'mentality_positioning', 'dribbling', 'attacking_crossing', 'attacking_short_passing', 
 'skill_long_passing', 'skill_ball_control', 'power_strength', 'skill_curve', 'goalkeeping_diving', 
 'goalkeeping_handling', 'goalkeping_kicking', 'goalkeeping_positioning', 'ls', 'st', 'rs', 'lam', 'cam', 
 'ram', 'lm', 'cm', 'rcm', 'rm', 'lwb', 'ldm', 'cdm', 'rdm', 'rwb', 'lb', 'lcb', 'cb', 'rcb', 'rb', 'lw', 
 'lf', 'rf', 'rw']

In [22]:
# Count number of null values in each column. Columns with over 50% of null rows will be dropped.
from pyspark.sql.functions import *

null_counts_df = casted_types_df.select([count(when(isnan(c) | col(c).isNull(), c)).alias(c) \
                        for c in casted_types_df.columns])

null_counts_df.show(truncate=False, vertical=True)

[Stage 71:>                                                         (0 + 1) / 1]

-RECORD 0-----------------------------
 sofifa_id                   | 0      
 player_url                  | 0      
 short_name                  | 0      
 long_name                   | 0      
 player_positions            | 0      
 overall                     | 0      
 potential                   | 0      
 value_eur                   | 1897   
 wage_eur                    | 1622   
 age                         | 0      
 dob                         | 0      
 height_cm                   | 0      
 weight_kg                   | 0      
 club_team_id                | 1630   
 club_name                   | 1630   
 league_name                 | 1630   
 league_level                | 2015   
 club_position               | 1630   
 club_jersey_number          | 1630   
 club_loaned_from            | 133774 
 club_joined                 | 9935   
 club_contract_valid_until   | 1630   
 nationality_id              | 0      
 nationality_name            | 0      
 nation_team_id          

                                                                                

In [23]:
casted_types_df.count()//2

71039

### Columns that will be dropped:

Columns that have over 50% null values (71039) will be dropped:

    - club_loaned_from
    - nation_team_id
    - nation_position
    - nation_jersey_number
    - player_tags
    - player_traits
    - goalkeeping_speed
    - nation_logo_url

Columns with url will be dropped, as they aren't relevant for the player's overall score:

    - player_url
    - player_face_url
    - club_logo_url
    - club_flag_url
    - nation_flag_url


Other columns to be dropped due to not being relevant for the target outcome:

    - sofifa_id
    - short_name
    - long_name
    - dob (age column contains similar information)
    - real_face
    - club_jersey_number
    - player_positions (conversion to one hot encoded is difficult)

Columns to be dropped because an id/encoded column for that feature already exists:

    - club_name
    - nationality_name

# Task 3

First, the data and feature engineering pipeline is defined according to the previous data exploration.

In [24]:
from pyspark.ml import Pipeline,Transformer
from pyspark.ml.feature import Imputer,StandardScaler,StringIndexer,OneHotEncoder, VectorAssembler

import numpy as np

col_names = ['sofifa_id', 'player_url', 'short_name', 'long_name', 'player_positions',
             'overall', 'potential', 'value_eur', 'wage_eur', 'age', 'dob', 'height_cm',
             'weight_kg', 'club_team_id', 'club_name', 'league_name', 'league_level',
             'club_position', 'club_jersey_number', 'club_loaned_from', 'club_joined',
             'club_contract_valid_until', 'nationality_id', 'nationality_name', 'nation_team_id',
             'nation_position', 'nation_jersey_number', 'preferred_foot', 'weak_foot',
             'skill_moves', 'international_reputation', 'work_rate', 'body_type', 'real_face',
             'release_clause_eur', 'player_tags', 'player_traits', 'pace', 'shooting', 'passing',
             'dribbling', 'defending', 'physic', 'attacking_crossing', 'attacking_finishing',
             'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys',
             'skill_dribbling', 'skill_curve', 'skill_fk_accuracy', 'skill_long_passing',
             'skill_ball_control', 'movement_acceleration', 'movement_sprint_speed',
             'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power',
             'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots',
             'mentality_aggression', 'mentality_interceptions', 'mentality_positioning',
             'mentality_vision', 'mentality_penalties', 'mentality_composure',
             'defending_marking_awareness', 'defending_standing_tackle', 'defending_sliding_tackle',
             'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking',
             'goalkeeping_positioning', 'goalkeeping_reflexes', 'goalkeeping_speed', 'ls',
             'st', 'rs', 'lw', 'lf', 'cf', 'rf', 'rw', 'lam', 'cam', 'ram', 'lm', 'lcm', 'cm',
             'rcm', 'rm', 'lwb', 'ldm', 'cdm', 'rdm', 'rwb', 'lb', 'lcb', 'cb', 'rcb', 'rb',
             'gk', 'player_face_url', 'club_logo_url', 'club_flag_url', 'nation_logo_url',
             'nation_flag_url', 'year', 'id']

nominal_cols = ['league_name', 'club_position','preferred_foot',
                'work_rate', 'body_type', 'nationality_name']

continuous_cols = ['potential', 'value_eur', 'wage_eur', 'age',
       'height_cm', 'weight_kg', 'league_level', 
       'club_contract_valid_until', 'weak_foot', 'skill_moves',
       'international_reputation', 'pace', 'shooting', 'passing', 'dribbling',
       'defending', 'physic', 'attacking_crossing', 'attacking_finishing',
       'attacking_heading_accuracy', 'attacking_short_passing',
       'attacking_volleys', 'skill_dribbling', 'skill_curve',
       'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control',
       'movement_acceleration', 'movement_sprint_speed', 'movement_agility',
       'movement_reactions', 'movement_balance', 'power_shot_power',
       'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots',
       'mentality_aggression', 'mentality_interceptions',
       'mentality_positioning', 'mentality_vision', 'mentality_penalties',
       'mentality_composure', 'defending_marking_awareness',
       'defending_standing_tackle', 'defending_sliding_tackle',
       'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking',
       'goalkeeping_positioning', 'goalkeeping_reflexes',
       'cf', 'lcm', 'gk']

sum_cols = ['cf', 'lcm', 'gk']

string_cols = ['ls', 'st', 'rs', 'lw', 'lf', 'rf', 'rw', 'lam', 'cam', 'ram', 'lm', 'cm',
             'rcm', 'rm', 'lwb', 'ldm', 'cdm', 'rdm', 'rwb', 'lb', 'lcb', 'cb', 'rcb', 'rb']

cols_to_remove = ['dob', 'player_positions', 'club_joined', 'club_loaned_from', 'nation_team_id',
                  'nation_position', 'nation_jersey_number', 'player_tags', 'player_traits',
                  'goalkeeping_speed', 'nation_logo_url', 'player_url', 'player_face_url',
                  'club_logo_url', 'club_flag_url', 'nation_flag_url', 'sofifa_id', 'short_name',
                  'long_name', 'real_face', 'club_jersey_number', 'club_team_id', 'nationality_id',
                  'club_name', 'release_clause_eur']

class OutcomeCreater(Transformer): # this defines a transformer that creates the outcome column
    
    def __init__(self):
        super().__init__()

    def _transform(self, dataset):
        output_df = dataset.withColumn('outcome', col('overall')).drop("overall")  
        # output_df = output_df.withColumn('outcome', col('outcome').cast(DoubleType()))
        return output_df

def sum_to_int(str):
    out=0
    for i,c in enumerate(str):
        if c == '+':
            out = int(str[:i]) + int(str[i+1:])
    return out


class FeatureTypeCaster(Transformer): # this transformer will cast the columns as appropriate types  
    def __init__(self):
        super().__init__()

    def _transform(self, dataset):
        output_df = dataset

        to_int = udf(lambda x: sum_to_int(x),IntegerType())
        for col_name in sum_cols:
            output_df = output_df.withColumn(col_name,to_int(col(col_name)))
        for col_name in (continuous_cols):
            output_df = output_df.withColumn(col_name,col(col_name).cast(DoubleType()))
        
        # This simplifies the body_type feature to only the 4 body type labels (unique, stocky, lean, normal)
        output_df = output_df.withColumn("body_type", split(output_df.body_type, ' ').getItem(0))
        return output_df

    
class FillNa(Transformer): # this defines a transformer that fills null values with number for imputing
    
    def __init__(self, columns_to_be_imputed = None):
        super().__init__()
        self.columns_to_be_imputed=columns_to_be_imputed
    def _transform(self, dataset):
        output_df = dataset.fillna(-200, self.columns_to_be_imputed)
        return output_df
    

class NaDropper(Transformer): # this defines a transformer that drops rows with null values
    
    def __init__(self, cols):
        self.cols = cols
        super().__init__()
    def _transform(self, dataset):
        output_df = dataset.na.drop(subset = self.cols)
        return output_df
    
    
class ColumnDropper(Transformer): # this transformer drops unnecessary columns
    def __init__(self, columns_to_drop = None):
        super().__init__()
        self.columns_to_drop=columns_to_drop
    def _transform(self, dataset):
        output_df = dataset
        for col_name in self.columns_to_drop:
            output_df = output_df.drop(col_name)
        return output_df

def get_preprocess_pipeline():
    # Stage where columns are casted as appropriate types
    stage_typecaster = FeatureTypeCaster()

    cols_to_be_imputed = ['value_eur', 'wage_eur',
       'club_contract_valid_until', 'pace', 'shooting', 'passing', 'dribbling',
       'defending', 'physic', 'mentality_composure']
#     imputed_cols = [x+"_imputed" for x in cols_to_be_imputed]
    
#     value_not_in_dataset = -200
#     stage_fillna = FillNa(columns_to_be_imputed = cols_to_be_imputed)
#     stage_imputer = Imputer (
#             inputCols=cols_to_be_imputed,
#             outputCols=imputed_cols,
#             strategy = "median",
#             missingValue = value_not_in_dataset)
    
#     stage_column_dropper_1 = ColumnDropper(columns_to_drop = cols_to_remove)
    stage_na_handler1 = NaDropper(nominal_cols)
    
    # Stage where nominal columns are transformed to index columns using StringIndexer
    nominal_id_cols = [x+"_index" for x in nominal_cols]
    nominal_onehot_cols = [x+"_encoded" for x in nominal_cols]
    stage_nominal_indexer = StringIndexer(inputCols = nominal_cols, outputCols = nominal_id_cols )

    # Stage where the index columns are further transformed using OneHotEncoder
    stage_nominal_onehot_encoder = OneHotEncoder(inputCols=nominal_id_cols, outputCols=nominal_onehot_cols)
    

    # Stage where all relevant features are assembled into a vector (and dropping a few)
    feature_cols = (continuous_cols +cols_to_be_imputed + nominal_onehot_cols)
    
    correlated_cols_to_remove = ['defending_marking_awareness', 'defending_standing_tackle', 
                    'defending_sliding_tackle', 'mentality_interceptions', 'movement_acceleration', 
                    'movement_sprint_speed', 'attacking_finishing', 'attacking_volleys', 
                    'power_shot_power', 'power_long_shots', 'mentality_positioning', 
                    'attacking_crossing', 'attacking_short_passing', 'skill_long_passing', 
                    'skill_ball_control', 'power_strength', 'skill_curve', 'goalkeeping_diving', 
                    'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning']
    for col_name in (correlated_cols_to_remove+cols_to_be_imputed):
        feature_cols.remove(col_name)
    
    stage_na_handler2 = NaDropper(feature_cols)

    stage_vector_assembler = VectorAssembler(inputCols=feature_cols, outputCol="vectorized_features")

    # Stage where we scale the columns
    stage_scaler = StandardScaler(inputCol= 'vectorized_features', outputCol= 'features')
    

    # Stage for creating the outcome column representing whether there is attack 
    stage_outcome = OutcomeCreater()

    # Removing all unnecessary columns, only keeping the 'features' and 'outcome' columns
    stage_column_dropper = ColumnDropper(columns_to_drop = cols_to_remove+nominal_cols+nominal_id_cols+
        nominal_onehot_cols + cols_to_be_imputed+continuous_cols+string_cols+
        ['vectorized_features', 'year', 'id'])
    # Connect the columns into a pipeline
    pipeline = Pipeline(stages=[stage_typecaster, stage_na_handler1,
                                stage_nominal_indexer,stage_nominal_onehot_encoder,
        stage_na_handler2, stage_vector_assembler,stage_scaler,stage_outcome,stage_column_dropper])
    return pipeline 

Run the data through the pipeline to obtain the feature vectors and target columns.

In [25]:
preprocess_pipeline = get_preprocess_pipeline()
preprocess_pipeline_model = preprocess_pipeline.fit(df_read)

fifa_df = preprocess_pipeline_model.transform(df_read)
# fifa_df = fifa_df.repartition(30)

# Creating the training, validation and test data sets, 60% training data
# The remaining 40% split between validation and test data
fifa_df_train, fifa_df_validate, fifa_df_test = fifa_df.randomSplit([0.6, 0.2, 0.2], 14813)

# Convert the dataframes to pandas in order to be used later on with Tensorflow
to_array = udf(lambda v: v.toArray().tolist(), ArrayType(FloatType()))

fifa_df_train_pandas = fifa_df_train.withColumn('features', to_array('features')).toPandas()
fifa_df_validate_pandas = fifa_df_validate.withColumn('features', to_array('features')).toPandas()
fifa_df_test_pandas = fifa_df_test.withColumn('features', to_array('features')).toPandas()

                                                                                

## Spark ML: Linear Regression and Random Forest Regression

In [26]:
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator

# The evaluator to be used with both Spark ML models.
evaluator = RegressionEvaluator(
    labelCol="outcome", predictionCol="prediction", metricName="mse")

Create the linear regression model and train it before doing cross validation

In [27]:
from pyspark.ml.regression import LinearRegression

lr = LinearRegression(featuresCol = 'features', labelCol = 'outcome', predictionCol = 'prediction',
                      maxIter = 100, regParam = 0.05)
lr_model = lr.fit(fifa_df_train)



[Stage 86:>                                                         (0 + 1) / 1]

22/11/29 02:41:19 WARN InstanceBuilder$NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
22/11/29 02:41:19 WARN InstanceBuilder$NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.ForeignLinkerBLAS
22/11/29 02:41:19 WARN InstanceBuilder$JavaBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.VectorBLAS
22/11/29 02:41:19 WARN InstanceBuilder$NativeLAPACK: Failed to load implementation from:dev.ludovic.netlib.lapack.JNILAPACK


                                                                                

In [28]:
lr_prediction_train = lr_model.transform(fifa_df_train)
lr_prediction_test = lr_model.transform(fifa_df_test)

lr_mse_train = evaluator.evaluate(lr_prediction_train)
lr_mse_test = evaluator.evaluate(lr_prediction_test)


print(f"Train MSE = {np.round(lr_mse_train,5)}, test MSE = {np.round(lr_mse_test,5)}")


[Stage 89:>                                                         (0 + 1) / 1]

Train MSE = 2.73777, test MSE = 2.83255


                                                                                

Create the cross validation linear regression model and train it, then compare the loss before and after cross-validation.

In [29]:
lr_paramGrid = (ParamGridBuilder()
             .addGrid(lr.maxIter, [100, 200, 500])# maximum number of iterations
             .addGrid(lr.regParam,[0.0001 ,0.05, 0.1])# Regularization parameter
             .build())

lr_cv = CrossValidator(estimator=lr, estimatorParamMaps=lr_paramGrid, 
                    evaluator=evaluator, numFolds=5, parallelism = 4)

lr_cv_model = lr_cv.fit(fifa_df_train)

lr_cv_prediction_test = lr_cv_model.transform(fifa_df_test)
lr_cv_mse = evaluator.evaluate(lr_cv_prediction_test)


                                                                                

In [30]:
print(f"Before cross-validation and parameter tuning, MSE={np.round(lr_mse_test,5)}")
print(f"After cross-validation and parameter tuning, MSE={np.round(lr_cv_mse,5)}")

Before cross-validation and parameter tuning, MSE=2.83255
After cross-validation and parameter tuning, MSE=2.80868


Repeat process above using random forest regression model.

In [31]:
from pyspark.ml.regression import RandomForestRegressor

rf = RandomForestRegressor(featuresCol = 'features', labelCol = 'outcome')
rf_model = rf.fit(fifa_df_train)

                                                                                

In [32]:
rf_prediction_train = rf_model.transform(fifa_df_train)
rf_prediction_test = rf_model.transform(fifa_df_test)

rf_mse_train = evaluator.evaluate(rf_prediction_train)
rf_mse_test = evaluator.evaluate(rf_prediction_test)


print(f"Train MSE = {np.round(rf_mse_train,5)}, test MSE = {np.round(rf_mse_test,5)}")


[Stage 243:>                                                        (0 + 1) / 1]

Train MSE = 2.25615, test MSE = 2.31273


                                                                                

In [33]:
rf_paramGrid = (ParamGridBuilder()
             .addGrid(rf.maxDepth, [3, 10, 20])# maximum depth for each tree
             .addGrid(rf.minInfoGain,[0.0, 1.0, 5.0])# minimum info gained
             .build())

rf_cv = CrossValidator(estimator=rf, estimatorParamMaps=rf_paramGrid, 
                    evaluator=evaluator, numFolds=5, parallelism = 4)

rf_cv_model = rf_cv.fit(fifa_df_train)

rf_cv_prediction_test = rf_cv_model.transform(fifa_df_test)
rf_cv_mse = evaluator.evaluate(rf_cv_prediction_test)


                                                                                

22/11/29 02:42:36 WARN DAGScheduler: Broadcasting large task binary with size 1139.1 KiB


                                                                                

22/11/29 02:42:38 WARN DAGScheduler: Broadcasting large task binary with size 2019.4 KiB


                                                                                

22/11/29 02:42:41 WARN DAGScheduler: Broadcasting large task binary with size 3.6 MiB


                                                                                

22/11/29 02:42:44 WARN DAGScheduler: Broadcasting large task binary with size 1139.1 KiB
22/11/29 02:42:45 WARN DAGScheduler: Broadcasting large task binary with size 6.4 MiB
22/11/29 02:42:46 WARN DAGScheduler: Broadcasting large task binary with size 2019.4 KiB


[Stage 373:>                (0 + 1) / 1][Stage 383:>                (0 + 1) / 1]                                                                                

22/11/29 02:42:49 WARN DAGScheduler: Broadcasting large task binary with size 3.6 MiB
22/11/29 02:42:52 WARN DAGScheduler: Broadcasting large task binary with size 6.4 MiB


                                                                                

22/11/29 02:42:55 WARN DAGScheduler: Broadcasting large task binary with size 10.7 MiB


[Stage 422:>                                                        (0 + 1) / 1]

22/11/29 02:42:58 WARN DAGScheduler: Broadcasting large task binary with size 1321.2 KiB


                                                                                

22/11/29 02:43:01 WARN DAGScheduler: Broadcasting large task binary with size 16.5 MiB


[Stage 424:>                                                        (0 + 1) / 1]

22/11/29 02:43:04 WARN DAGScheduler: Broadcasting large task binary with size 1876.7 KiB


                                                                                

22/11/29 02:43:08 WARN DAGScheduler: Broadcasting large task binary with size 22.3 MiB


[Stage 426:>                                                        (0 + 1) / 1]

22/11/29 02:43:11 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:43:16 WARN DAGScheduler: Broadcasting large task binary with size 26.1 MiB


[Stage 428:>                                                        (0 + 1) / 1]

22/11/29 02:43:19 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:43:24 WARN DAGScheduler: Broadcasting large task binary with size 31.9 MiB


[Stage 430:>                                                        (0 + 1) / 1]

22/11/29 02:43:27 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:43:32 WARN DAGScheduler: Broadcasting large task binary with size 39.4 MiB


[Stage 432:>                                                        (0 + 1) / 1]

22/11/29 02:43:35 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


[Stage 434:>                                                        (0 + 0) / 1]

22/11/29 02:43:40 WARN DAGScheduler: Broadcasting large task binary with size 45.3 MiB


[Stage 434:>                                                        (0 + 1) / 1]

22/11/29 02:43:43 WARN DAGScheduler: Broadcasting large task binary with size 2.1 MiB


[Stage 436:>                                                        (0 + 0) / 1]

22/11/29 02:43:48 WARN DAGScheduler: Broadcasting large task binary with size 49.0 MiB


[Stage 436:>                                                        (0 + 1) / 1]

22/11/29 02:43:51 WARN DAGScheduler: Broadcasting large task binary with size 1763.5 KiB


[Stage 438:>                                                        (0 + 0) / 1]

22/11/29 02:43:56 WARN DAGScheduler: Broadcasting large task binary with size 51.6 MiB


[Stage 438:>                                                        (0 + 1) / 1]

22/11/29 02:43:59 WARN DAGScheduler: Broadcasting large task binary with size 1347.3 KiB


[Stage 440:>                                                        (0 + 0) / 1]

22/11/29 02:44:03 WARN DAGScheduler: Broadcasting large task binary with size 53.4 MiB


                                                                                

22/11/29 02:44:08 WARN DAGScheduler: Broadcasting large task binary with size 10.6 MiB


                                                                                

22/11/29 02:44:09 WARN DAGScheduler: Broadcasting large task binary with size 10.9 MiB
22/11/29 02:44:10 WARN DAGScheduler: Broadcasting large task binary with size 5.5 MiB
22/11/29 02:44:11 WARN DAGScheduler: Broadcasting large task binary with size 5.5 MiB


                                                                                

22/11/29 02:44:22 WARN DAGScheduler: Broadcasting large task binary with size 1139.9 KiB


                                                                                

22/11/29 02:44:24 WARN DAGScheduler: Broadcasting large task binary with size 2026.7 KiB


                                                                                

22/11/29 02:44:27 WARN DAGScheduler: Broadcasting large task binary with size 3.6 MiB
22/11/29 02:44:28 WARN DAGScheduler: Broadcasting large task binary with size 1139.9 KiB
22/11/29 02:44:29 WARN DAGScheduler: Broadcasting large task binary with size 6.4 MiB
22/11/29 02:44:29 WARN DAGScheduler: Broadcasting large task binary with size 2026.7 KiB
22/11/29 02:44:31 WARN DAGScheduler: Broadcasting large task binary with size 3.6 MiB
22/11/29 02:44:33 WARN DAGScheduler: Broadcasting large task binary with size 6.4 MiB


                                                                                

22/11/29 02:44:36 WARN DAGScheduler: Broadcasting large task binary with size 10.6 MiB


[Stage 629:>                                                        (0 + 1) / 1]

22/11/29 02:44:37 WARN DAGScheduler: Broadcasting large task binary with size 1313.4 KiB


                                                                                

22/11/29 02:44:39 WARN DAGScheduler: Broadcasting large task binary with size 16.5 MiB


[Stage 631:>                                                        (0 + 1) / 1]

22/11/29 02:44:41 WARN DAGScheduler: Broadcasting large task binary with size 1876.7 KiB


                                                                                

22/11/29 02:44:43 WARN DAGScheduler: Broadcasting large task binary with size 22.3 MiB


[Stage 633:>                                                        (0 + 1) / 1]

22/11/29 02:44:46 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:44:48 WARN DAGScheduler: Broadcasting large task binary with size 26.3 MiB


[Stage 635:>                                                        (0 + 1) / 1]

22/11/29 02:44:50 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:44:53 WARN DAGScheduler: Broadcasting large task binary with size 32.0 MiB


[Stage 637:>                                                        (0 + 1) / 1]

22/11/29 02:44:55 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:44:58 WARN DAGScheduler: Broadcasting large task binary with size 38.6 MiB


[Stage 639:>                                                        (0 + 1) / 1]

22/11/29 02:45:01 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:45:04 WARN DAGScheduler: Broadcasting large task binary with size 45.5 MiB


[Stage 641:>                                                        (0 + 1) / 1]

22/11/29 02:45:07 WARN DAGScheduler: Broadcasting large task binary with size 2.1 MiB


                                                                                

22/11/29 02:45:10 WARN DAGScheduler: Broadcasting large task binary with size 49.2 MiB


[Stage 643:>                                                        (0 + 1) / 1]

22/11/29 02:45:12 WARN DAGScheduler: Broadcasting large task binary with size 1779.0 KiB


[Stage 645:>                                                        (0 + 0) / 1]

22/11/29 02:45:15 WARN DAGScheduler: Broadcasting large task binary with size 51.7 MiB


[Stage 645:>                                                        (0 + 1) / 1]

22/11/29 02:45:17 WARN DAGScheduler: Broadcasting large task binary with size 1334.8 KiB


                                                                                

22/11/29 02:45:20 WARN DAGScheduler: Broadcasting large task binary with size 53.4 MiB


                                                                                

22/11/29 02:45:23 WARN DAGScheduler: Broadcasting large task binary with size 10.7 MiB
22/11/29 02:45:23 WARN DAGScheduler: Broadcasting large task binary with size 11.0 MiB
22/11/29 02:45:24 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
22/11/29 02:45:24 WARN DAGScheduler: Broadcasting large task binary with size 5.5 MiB


                                                                                

22/11/29 02:45:31 WARN DAGScheduler: Broadcasting large task binary with size 1140.1 KiB
22/11/29 02:45:32 WARN DAGScheduler: Broadcasting large task binary with size 2023.9 KiB
22/11/29 02:45:33 WARN DAGScheduler: Broadcasting large task binary with size 3.6 MiB
22/11/29 02:45:34 WARN DAGScheduler: Broadcasting large task binary with size 1140.1 KiB
22/11/29 02:45:35 WARN DAGScheduler: Broadcasting large task binary with size 6.4 MiB
22/11/29 02:45:35 WARN DAGScheduler: Broadcasting large task binary with size 2023.9 KiB
22/11/29 02:45:37 WARN DAGScheduler: Broadcasting large task binary with size 3.6 MiB
22/11/29 02:45:39 WARN DAGScheduler: Broadcasting large task binary with size 6.4 MiB


                                                                                

22/11/29 02:45:41 WARN DAGScheduler: Broadcasting large task binary with size 10.7 MiB


[Stage 836:>                                                        (0 + 1) / 1]

22/11/29 02:45:43 WARN DAGScheduler: Broadcasting large task binary with size 1328.0 KiB


                                                                                

22/11/29 02:45:45 WARN DAGScheduler: Broadcasting large task binary with size 16.6 MiB


[Stage 838:>                                                        (0 + 1) / 1]

22/11/29 02:45:47 WARN DAGScheduler: Broadcasting large task binary with size 1906.0 KiB


                                                                                

22/11/29 02:45:49 WARN DAGScheduler: Broadcasting large task binary with size 22.3 MiB


[Stage 840:>                                                        (0 + 1) / 1]

22/11/29 02:45:51 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:45:54 WARN DAGScheduler: Broadcasting large task binary with size 26.3 MiB


[Stage 842:>                                                        (0 + 1) / 1]

22/11/29 02:45:56 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:45:59 WARN DAGScheduler: Broadcasting large task binary with size 31.9 MiB


[Stage 844:>                                                        (0 + 1) / 1]

22/11/29 02:46:01 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:46:04 WARN DAGScheduler: Broadcasting large task binary with size 38.6 MiB


[Stage 846:>                                                        (0 + 1) / 1]

22/11/29 02:46:06 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:46:10 WARN DAGScheduler: Broadcasting large task binary with size 45.3 MiB


[Stage 848:>                                                        (0 + 1) / 1]

22/11/29 02:46:12 WARN DAGScheduler: Broadcasting large task binary with size 2.1 MiB


                                                                                

22/11/29 02:46:15 WARN DAGScheduler: Broadcasting large task binary with size 48.9 MiB


[Stage 850:>                                                        (0 + 1) / 1]

22/11/29 02:46:18 WARN DAGScheduler: Broadcasting large task binary with size 1756.8 KiB


                                                                                

22/11/29 02:46:21 WARN DAGScheduler: Broadcasting large task binary with size 51.5 MiB


[Stage 852:>                                                        (0 + 1) / 1]

22/11/29 02:46:23 WARN DAGScheduler: Broadcasting large task binary with size 1335.6 KiB


                                                                                

22/11/29 02:46:26 WARN DAGScheduler: Broadcasting large task binary with size 53.2 MiB


                                                                                

22/11/29 02:46:29 WARN DAGScheduler: Broadcasting large task binary with size 10.6 MiB
22/11/29 02:46:30 WARN DAGScheduler: Broadcasting large task binary with size 10.9 MiB
22/11/29 02:46:30 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
22/11/29 02:46:31 WARN DAGScheduler: Broadcasting large task binary with size 5.6 MiB


                                                                                

22/11/29 02:46:37 WARN DAGScheduler: Broadcasting large task binary with size 1139.0 KiB
22/11/29 02:46:38 WARN DAGScheduler: Broadcasting large task binary with size 2017.0 KiB
22/11/29 02:46:39 WARN DAGScheduler: Broadcasting large task binary with size 3.6 MiB
22/11/29 02:46:40 WARN DAGScheduler: Broadcasting large task binary with size 1139.0 KiB
22/11/29 02:46:41 WARN DAGScheduler: Broadcasting large task binary with size 2017.0 KiB
22/11/29 02:46:42 WARN DAGScheduler: Broadcasting large task binary with size 6.3 MiB
22/11/29 02:46:43 WARN DAGScheduler: Broadcasting large task binary with size 3.6 MiB
22/11/29 02:46:45 WARN DAGScheduler: Broadcasting large task binary with size 6.3 MiB


                                                                                

22/11/29 02:46:48 WARN DAGScheduler: Broadcasting large task binary with size 10.6 MiB


[Stage 1043:>                                                       (0 + 1) / 1]

22/11/29 02:46:49 WARN DAGScheduler: Broadcasting large task binary with size 1321.4 KiB


                                                                                

22/11/29 02:46:51 WARN DAGScheduler: Broadcasting large task binary with size 16.5 MiB


[Stage 1045:>                                                       (0 + 1) / 1]

22/11/29 02:46:53 WARN DAGScheduler: Broadcasting large task binary with size 1885.3 KiB


                                                                                

22/11/29 02:46:56 WARN DAGScheduler: Broadcasting large task binary with size 22.2 MiB


[Stage 1047:>                                                       (0 + 1) / 1]

22/11/29 02:46:58 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:47:01 WARN DAGScheduler: Broadcasting large task binary with size 26.1 MiB


[Stage 1049:>                                                       (0 + 1) / 1]

22/11/29 02:47:03 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:47:06 WARN DAGScheduler: Broadcasting large task binary with size 31.8 MiB


[Stage 1051:>                                                       (0 + 1) / 1]

22/11/29 02:47:08 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:47:11 WARN DAGScheduler: Broadcasting large task binary with size 38.4 MiB


[Stage 1053:>                                                       (0 + 1) / 1]

22/11/29 02:47:13 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:47:16 WARN DAGScheduler: Broadcasting large task binary with size 45.2 MiB


[Stage 1055:>                                                       (0 + 1) / 1]

22/11/29 02:47:18 WARN DAGScheduler: Broadcasting large task binary with size 2.1 MiB


                                                                                

22/11/29 02:47:22 WARN DAGScheduler: Broadcasting large task binary with size 48.8 MiB


[Stage 1057:>                                                       (0 + 1) / 1]

22/11/29 02:47:24 WARN DAGScheduler: Broadcasting large task binary with size 1739.6 KiB


                                                                                

22/11/29 02:47:27 WARN DAGScheduler: Broadcasting large task binary with size 51.3 MiB


[Stage 1059:>                                                       (0 + 1) / 1]

22/11/29 02:47:29 WARN DAGScheduler: Broadcasting large task binary with size 1318.1 KiB


[Stage 1061:>                                                       (0 + 0) / 1]

22/11/29 02:47:32 WARN DAGScheduler: Broadcasting large task binary with size 53.0 MiB


                                                                                

22/11/29 02:47:34 WARN DAGScheduler: Broadcasting large task binary with size 10.5 MiB
22/11/29 02:47:35 WARN DAGScheduler: Broadcasting large task binary with size 10.8 MiB
22/11/29 02:47:36 WARN DAGScheduler: Broadcasting large task binary with size 8.1 MiB
22/11/29 02:47:36 WARN DAGScheduler: Broadcasting large task binary with size 5.4 MiB


                                                                                

22/11/29 02:47:43 WARN DAGScheduler: Broadcasting large task binary with size 1139.7 KiB
22/11/29 02:47:44 WARN DAGScheduler: Broadcasting large task binary with size 2017.4 KiB
22/11/29 02:47:45 WARN DAGScheduler: Broadcasting large task binary with size 3.6 MiB
22/11/29 02:47:46 WARN DAGScheduler: Broadcasting large task binary with size 1139.7 KiB
22/11/29 02:47:47 WARN DAGScheduler: Broadcasting large task binary with size 2017.4 KiB
22/11/29 02:47:48 WARN DAGScheduler: Broadcasting large task binary with size 6.4 MiB
22/11/29 02:47:49 WARN DAGScheduler: Broadcasting large task binary with size 3.6 MiB
22/11/29 02:47:51 WARN DAGScheduler: Broadcasting large task binary with size 6.4 MiB


                                                                                

22/11/29 02:47:53 WARN DAGScheduler: Broadcasting large task binary with size 10.6 MiB


[Stage 1248:>                                                       (0 + 1) / 1]

22/11/29 02:47:55 WARN DAGScheduler: Broadcasting large task binary with size 1320.8 KiB


                                                                                

22/11/29 02:47:57 WARN DAGScheduler: Broadcasting large task binary with size 16.5 MiB


[Stage 1250:>                                                       (0 + 1) / 1]

22/11/29 02:47:59 WARN DAGScheduler: Broadcasting large task binary with size 1875.8 KiB


                                                                                

22/11/29 02:48:01 WARN DAGScheduler: Broadcasting large task binary with size 22.1 MiB


[Stage 1252:>                                                       (0 + 1) / 1]

22/11/29 02:48:04 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:48:06 WARN DAGScheduler: Broadcasting large task binary with size 26.0 MiB


[Stage 1254:>                                                       (0 + 1) / 1]

22/11/29 02:48:09 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:48:12 WARN DAGScheduler: Broadcasting large task binary with size 31.7 MiB


[Stage 1256:>                                                       (0 + 1) / 1]

22/11/29 02:48:13 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:48:17 WARN DAGScheduler: Broadcasting large task binary with size 38.2 MiB


[Stage 1258:>                                                       (0 + 1) / 1]

22/11/29 02:48:19 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:48:22 WARN DAGScheduler: Broadcasting large task binary with size 45.1 MiB


[Stage 1260:>                                                       (0 + 1) / 1]

22/11/29 02:48:24 WARN DAGScheduler: Broadcasting large task binary with size 2.1 MiB


                                                                                

22/11/29 02:48:28 WARN DAGScheduler: Broadcasting large task binary with size 48.9 MiB


[Stage 1262:>                                                       (0 + 1) / 1]

22/11/29 02:48:30 WARN DAGScheduler: Broadcasting large task binary with size 1790.2 KiB


                                                                                

22/11/29 02:48:33 WARN DAGScheduler: Broadcasting large task binary with size 51.5 MiB


[Stage 1264:>                                                       (0 + 1) / 1]

22/11/29 02:48:35 WARN DAGScheduler: Broadcasting large task binary with size 1345.6 KiB


                                                                                

22/11/29 02:48:38 WARN DAGScheduler: Broadcasting large task binary with size 53.2 MiB


                                                                                

22/11/29 02:48:41 WARN DAGScheduler: Broadcasting large task binary with size 10.7 MiB
22/11/29 02:48:42 WARN DAGScheduler: Broadcasting large task binary with size 11.0 MiB
22/11/29 02:48:42 WARN DAGScheduler: Broadcasting large task binary with size 8.3 MiB
22/11/29 02:48:43 WARN DAGScheduler: Broadcasting large task binary with size 5.6 MiB


                                                                                

22/11/29 02:48:52 WARN DAGScheduler: Broadcasting large task binary with size 1124.6 KiB


                                                                                

22/11/29 02:48:53 WARN DAGScheduler: Broadcasting large task binary with size 2013.4 KiB


                                                                                

22/11/29 02:48:55 WARN DAGScheduler: Broadcasting large task binary with size 3.6 MiB


                                                                                

22/11/29 02:48:57 WARN DAGScheduler: Broadcasting large task binary with size 6.5 MiB


                                                                                

22/11/29 02:48:59 WARN DAGScheduler: Broadcasting large task binary with size 11.1 MiB


[Stage 1301:>                                                       (0 + 1) / 1]

22/11/29 02:49:01 WARN DAGScheduler: Broadcasting large task binary with size 1406.3 KiB


                                                                                

22/11/29 02:49:03 WARN DAGScheduler: Broadcasting large task binary with size 17.6 MiB


[Stage 1303:>                                                       (0 + 1) / 1]

22/11/29 02:49:05 WARN DAGScheduler: Broadcasting large task binary with size 2.0 MiB


                                                                                

22/11/29 02:49:08 WARN DAGScheduler: Broadcasting large task binary with size 21.6 MiB


[Stage 1305:>                                                       (0 + 1) / 1]

22/11/29 02:49:10 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:49:13 WARN DAGScheduler: Broadcasting large task binary with size 24.5 MiB


[Stage 1307:>                                                       (0 + 1) / 1]

22/11/29 02:49:15 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:49:18 WARN DAGScheduler: Broadcasting large task binary with size 30.2 MiB


[Stage 1309:>                                                       (0 + 1) / 1]

22/11/29 02:49:20 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:49:23 WARN DAGScheduler: Broadcasting large task binary with size 36.8 MiB


[Stage 1311:>                                                       (0 + 1) / 1]

22/11/29 02:49:25 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:49:29 WARN DAGScheduler: Broadcasting large task binary with size 44.2 MiB


[Stage 1313:>                                                       (0 + 1) / 1]

22/11/29 02:49:31 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


                                                                                

22/11/29 02:49:35 WARN DAGScheduler: Broadcasting large task binary with size 51.8 MiB


[Stage 1315:>                                                       (0 + 1) / 1]

22/11/29 02:49:37 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB


[Stage 1317:>                                                       (0 + 0) / 1]

22/11/29 02:49:41 WARN DAGScheduler: Broadcasting large task binary with size 56.4 MiB


[Stage 1317:>                                                       (0 + 1) / 1]

22/11/29 02:49:43 WARN DAGScheduler: Broadcasting large task binary with size 1997.7 KiB


                                                                                

22/11/29 02:49:47 WARN DAGScheduler: Broadcasting large task binary with size 59.9 MiB


[Stage 1319:>                                                       (0 + 1) / 1]

22/11/29 02:49:49 WARN DAGScheduler: Broadcasting large task binary with size 1654.5 KiB


                                                                                

22/11/29 02:49:51 WARN DAGScheduler: Broadcasting large task binary with size 19.2 MiB


                                                                                

22/11/29 02:49:53 WARN DAGScheduler: Broadcasting large task binary with size 20.5 MiB


                                                                                

22/11/29 02:49:55 WARN DAGScheduler: Broadcasting large task binary with size 18.4 MiB


                                                                                

22/11/29 02:49:57 WARN DAGScheduler: Broadcasting large task binary with size 12.8 MiB
22/11/29 02:49:58 WARN DAGScheduler: Broadcasting large task binary with size 13.2 MiB
22/11/29 02:49:58 WARN DAGScheduler: Broadcasting large task binary with size 3.4 MiB


                                                                                

In [34]:
print(f"Before cross-validation and parameter tuning, MSE={np.round(rf_mse_test,5)}")
print(f"After cross-validation and parameter tuning, MSE={np.round(rf_cv_mse,5)}")

Before cross-validation and parameter tuning, MSE=2.31273
After cross-validation and parameter tuning, MSE=0.58248


# Tensorflow Shallow and deep neural network regression

First, the pandas dataframes are converted to tensors

In [35]:
import tensorflow as tf
from tensorflow import keras 

# Converting the pandas DataFrame to tensors

x_train = tf.constant(np.array(fifa_df_train_pandas['features'].values.tolist()))
y_train = tf.constant(np.array(fifa_df_train_pandas['outcome'].values.tolist()))

x_validate = tf.constant(np.array(fifa_df_validate_pandas['features'].values.tolist()))
y_validate = tf.constant(np.array(fifa_df_validate_pandas['outcome'].values.tolist()))


x_test = tf.constant(np.array(fifa_df_test_pandas['features'].values.tolist()))
y_test = tf.constant(np.array(fifa_df_test_pandas['outcome'].values.tolist()))


Metal device set to: Apple M1


2022-11-29 02:50:05.267673: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-11-29 02:50:05.267972: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


First model is a shallow linear regression model, this is the closest to the linear regression in Spark ML.

In [36]:
lr_model = keras.Sequential( [
                            keras.layers.Dense(1)
                            ] )


y_pred = lr_model(x_train)
lr_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (57921, 1)                313       
                                                                 
Total params: 313
Trainable params: 313
Non-trainable params: 0
_________________________________________________________________


Compile and train the model before hyperparameter tuning.

In [37]:
# Compile the model
lr_model.compile(optimizer = 'adam',
    loss=tf.keras.losses.MeanSquaredError(), 
    metrics=[tf.keras.metrics.MeanSquaredError()])

In [38]:
lr_model.fit(x_train,y_train, epochs = 10,validation_data=(x_validate,y_validate),verbose = 2)

2022-11-29 02:50:05.980821: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 1/10


2022-11-29 02:50:06.365751: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 02:50:11.645479: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1811/1811 - 7s - loss: 56.5972 - mean_squared_error: 56.5972 - val_loss: 13.3480 - val_mean_squared_error: 13.3480 - 7s/epoch - 4ms/step
Epoch 2/10
1811/1811 - 6s - loss: 9.8283 - mean_squared_error: 9.8283 - val_loss: 8.4778 - val_mean_squared_error: 8.4778 - 6s/epoch - 3ms/step
Epoch 3/10
1811/1811 - 6s - loss: 6.6885 - mean_squared_error: 6.6885 - val_loss: 6.3918 - val_mean_squared_error: 6.3918 - 6s/epoch - 3ms/step
Epoch 4/10
1811/1811 - 6s - loss: 5.3980 - mean_squared_error: 5.3980 - val_loss: 4.9601 - val_mean_squared_error: 4.9601 - 6s/epoch - 3ms/step
Epoch 5/10
1811/1811 - 6s - loss: 4.8241 - mean_squared_error: 4.8241 - val_loss: 4.5157 - val_mean_squared_error: 4.5157 - 6s/epoch - 3ms/step
Epoch 6/10
1811/1811 - 6s - loss: 4.4702 - mean_squared_error: 4.4702 - val_loss: 4.2919 - val_mean_squared_error: 4.2919 - 6s/epoch - 3ms/step
Epoch 7/10
1811/1811 - 6s - loss: 4.2250 - mean_squared_error: 4.2250 - val_loss: 4.8437 - val_mean_squared_error: 4.8437 - 6s/epoch - 3ms/step

<keras.callbacks.History at 0x2900a21d0>

Hyperparameter tuning with tensorboard.

In [39]:
from tensorboard.plugins.hparams import api as hp

# Defining the hyperparameters that will be compared and tuned using tensorboard

HP_LEARNING_RATE = hp.HParam('learning_rate', hp.Discrete([0.001,0.005,0.01, 0.1])) # the learning rate

# Creating the logs
with tf.summary.create_file_writer('logs/lr_model').as_default():
    hp.hparams_config(
        hparams=[HP_LEARNING_RATE],
        metrics=[hp.Metric('MSE')],
    )



In [40]:
# Defining the model training function for hyperparameter tuning
def train_test_model(hparams,logdir, x, y, i, k):

    model = keras.Sequential()
    model.add(keras.layers.Dense(1))
    model.compile(
        optimizer = tf.optimizers.Adam(learning_rate=hparams[HP_LEARNING_RATE]),
        loss = tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.metrics.MeanSquaredError(name = 'MSE_Epochs')])
    
  # This section is for splitting the data sets into k folds. May be better to do this outside this function
    x_train = tf.split(x, k)
    y_train = tf.split(y, k)
    val_x = x_train.pop(i)
    val_y = y_train.pop(i)
    x_train = tf.concat([x_train[0], x_train[1]], 0)  
    y_train = tf.concat([y_train[0], y_train[1]], 0)  

  # Store the output of the model training
    history = model.fit(x_train, y_train, epochs=20, verbose = 2,
    callbacks=[tf.keras.callbacks.TensorBoard(log_dir=logdir, histogram_freq=1)],
    validation_data = (val_x, val_y))

    mse = np.min(history.history["val_MSE_Epochs"]) # Returns the lowest Mean Squared Error obtained in training
    return mse


In [41]:
# Combine the original training and validation data sets
# x_train = tf.concat([x_train,x_validate], 0)
# y_train = tf.concat([y_train,y_validate], 0)

# Get randomized indices in order to shuffle data sets
indices = tf.range(start=0, limit=tf.shape(x_train)[0], dtype=tf.int32)

shuffled_indices = tf.random.shuffle(indices)

# Shuffle the data sets with the random indices
x_train_shuffled = tf.gather(x_train, shuffled_indices)
y_train_shuffled = tf.gather(y_train, shuffled_indices)
print(tf.shape(x_train_shuffled))

# Number of folds to split the data into
k = 3

tf.Tensor([57921   312], shape=(2,), dtype=int32)


In [42]:
# Nested loops for hyperparameter tuning, iterating over all the hyperparameters and combinations
for hp_lr in HP_LEARNING_RATE.domain.values:
    hparams = {
        HP_LEARNING_RATE: hp_lr,
    }
    run_name = f"run-LEARNING_RATE {(hparams[HP_LEARNING_RATE])}"
    print('--- Starting trial: %s' % run_name)
    print({h.name: hparams[h] for h in hparams})

    run_dir = 'logs/lr_model/' + run_name
        
    # The model is trained with each of the k folds in the dataset and mse is averaged over the k runs.
    mse = 0
    for i in range(k):
        mse += train_test_model(hparams, run_dir, x_train_shuffled, y_train_shuffled, i, k)
    mse = mse / k

    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        tf.summary.scalar("MSE", mse, step=1)


--- Starting trial: run-LEARNING_RATE 0.001
{'learning_rate': 0.001}
Epoch 1/20


2022-11-29 02:51:08.346639: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 02:51:11.876827: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 5s - loss: 297.1802 - MSE_Epochs: 297.1802 - val_loss: 22.5944 - val_MSE_Epochs: 22.5944 - 5s/epoch - 4ms/step
Epoch 2/20
1207/1207 - 5s - loss: 15.8330 - MSE_Epochs: 15.8330 - val_loss: 11.7084 - val_MSE_Epochs: 11.7084 - 5s/epoch - 4ms/step
Epoch 3/20
1207/1207 - 4s - loss: 9.4497 - MSE_Epochs: 9.4497 - val_loss: 8.5031 - val_MSE_Epochs: 8.5031 - 4s/epoch - 4ms/step
Epoch 4/20
1207/1207 - 4s - loss: 7.4793 - MSE_Epochs: 7.4793 - val_loss: 7.1913 - val_MSE_Epochs: 7.1913 - 4s/epoch - 4ms/step
Epoch 5/20
1207/1207 - 4s - loss: 6.4668 - MSE_Epochs: 6.4668 - val_loss: 6.4443 - val_MSE_Epochs: 6.4443 - 4s/epoch - 4ms/step
Epoch 6/20
1207/1207 - 4s - loss: 5.7016 - MSE_Epochs: 5.7016 - val_loss: 5.9266 - val_MSE_Epochs: 5.9266 - 4s/epoch - 4ms/step
Epoch 7/20
1207/1207 - 4s - loss: 5.2056 - MSE_Epochs: 5.2056 - val_loss: 5.3059 - val_MSE_Epochs: 5.3059 - 4s/epoch - 4ms/step
Epoch 8/20
1207/1207 - 4s - loss: 4.8446 - MSE_Epochs: 4.8446 - val_loss: 5.0835 - val_MSE_Epochs: 5.0835

2022-11-29 02:52:37.963035: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 02:52:41.368091: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 5s - loss: 300.1273 - MSE_Epochs: 300.1273 - val_loss: 22.0078 - val_MSE_Epochs: 22.0078 - 5s/epoch - 4ms/step
Epoch 2/20
1207/1207 - 4s - loss: 16.0259 - MSE_Epochs: 16.0259 - val_loss: 11.4809 - val_MSE_Epochs: 11.4809 - 4s/epoch - 4ms/step
Epoch 3/20
1207/1207 - 4s - loss: 9.5815 - MSE_Epochs: 9.5815 - val_loss: 8.3225 - val_MSE_Epochs: 8.3225 - 4s/epoch - 4ms/step
Epoch 4/20
1207/1207 - 4s - loss: 7.5704 - MSE_Epochs: 7.5704 - val_loss: 7.0231 - val_MSE_Epochs: 7.0231 - 4s/epoch - 4ms/step
Epoch 5/20
1207/1207 - 4s - loss: 6.5384 - MSE_Epochs: 6.5384 - val_loss: 6.1020 - val_MSE_Epochs: 6.1020 - 4s/epoch - 4ms/step
Epoch 6/20
1207/1207 - 4s - loss: 5.8141 - MSE_Epochs: 5.8141 - val_loss: 5.4988 - val_MSE_Epochs: 5.4988 - 4s/epoch - 4ms/step
Epoch 7/20
1207/1207 - 4s - loss: 5.2960 - MSE_Epochs: 5.2960 - val_loss: 5.0392 - val_MSE_Epochs: 5.0392 - 4s/epoch - 4ms/step
Epoch 8/20
1207/1207 - 4s - loss: 4.9490 - MSE_Epochs: 4.9490 - val_loss: 4.8311 - val_MSE_Epochs: 4.8311

2022-11-29 02:54:06.913915: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 02:54:10.419299: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 5s - loss: 301.6843 - MSE_Epochs: 301.6843 - val_loss: 22.8367 - val_MSE_Epochs: 22.8367 - 5s/epoch - 4ms/step
Epoch 2/20
1207/1207 - 4s - loss: 16.3973 - MSE_Epochs: 16.3973 - val_loss: 11.7325 - val_MSE_Epochs: 11.7325 - 4s/epoch - 4ms/step
Epoch 3/20
1207/1207 - 4s - loss: 9.6801 - MSE_Epochs: 9.6801 - val_loss: 8.2128 - val_MSE_Epochs: 8.2128 - 4s/epoch - 4ms/step
Epoch 4/20
1207/1207 - 4s - loss: 7.6135 - MSE_Epochs: 7.6135 - val_loss: 6.8609 - val_MSE_Epochs: 6.8609 - 4s/epoch - 4ms/step
Epoch 5/20
1207/1207 - 5s - loss: 6.5976 - MSE_Epochs: 6.5976 - val_loss: 6.4202 - val_MSE_Epochs: 6.4202 - 5s/epoch - 4ms/step
Epoch 6/20
1207/1207 - 4s - loss: 5.8404 - MSE_Epochs: 5.8404 - val_loss: 5.6778 - val_MSE_Epochs: 5.6778 - 4s/epoch - 4ms/step
Epoch 7/20
1207/1207 - 4s - loss: 5.3119 - MSE_Epochs: 5.3119 - val_loss: 4.9659 - val_MSE_Epochs: 4.9659 - 4s/epoch - 4ms/step
Epoch 8/20
1207/1207 - 4s - loss: 4.9871 - MSE_Epochs: 4.9871 - val_loss: 5.3590 - val_MSE_Epochs: 5.3590

2022-11-29 02:55:35.495493: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 02:55:38.905170: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 5s - loss: 71.6100 - MSE_Epochs: 71.6100 - val_loss: 8.1378 - val_MSE_Epochs: 8.1378 - 5s/epoch - 4ms/step
Epoch 2/20
1207/1207 - 5s - loss: 6.9144 - MSE_Epochs: 6.9144 - val_loss: 6.2521 - val_MSE_Epochs: 6.2521 - 5s/epoch - 4ms/step
Epoch 3/20
1207/1207 - 5s - loss: 5.6391 - MSE_Epochs: 5.6391 - val_loss: 5.3759 - val_MSE_Epochs: 5.3759 - 5s/epoch - 4ms/step
Epoch 4/20
1207/1207 - 5s - loss: 5.2943 - MSE_Epochs: 5.2943 - val_loss: 4.8816 - val_MSE_Epochs: 4.8816 - 5s/epoch - 4ms/step
Epoch 5/20
1207/1207 - 4s - loss: 4.7703 - MSE_Epochs: 4.7703 - val_loss: 4.3794 - val_MSE_Epochs: 4.3794 - 4s/epoch - 4ms/step
Epoch 6/20
1207/1207 - 4s - loss: 4.5046 - MSE_Epochs: 4.5046 - val_loss: 4.2641 - val_MSE_Epochs: 4.2641 - 4s/epoch - 4ms/step
Epoch 7/20
1207/1207 - 4s - loss: 4.3941 - MSE_Epochs: 4.3941 - val_loss: 3.9925 - val_MSE_Epochs: 3.9925 - 4s/epoch - 4ms/step
Epoch 8/20
1207/1207 - 4s - loss: 4.0925 - MSE_Epochs: 4.0925 - val_loss: 3.7984 - val_MSE_Epochs: 3.7984 - 4s/ep

2022-11-29 02:57:04.356138: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 02:57:07.776636: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 5s - loss: 71.9217 - MSE_Epochs: 71.9217 - val_loss: 8.0327 - val_MSE_Epochs: 8.0327 - 5s/epoch - 4ms/step
Epoch 2/20
1207/1207 - 4s - loss: 7.0006 - MSE_Epochs: 7.0006 - val_loss: 6.4270 - val_MSE_Epochs: 6.4270 - 4s/epoch - 4ms/step
Epoch 3/20
1207/1207 - 4s - loss: 5.7663 - MSE_Epochs: 5.7663 - val_loss: 6.2200 - val_MSE_Epochs: 6.2200 - 4s/epoch - 4ms/step
Epoch 4/20
1207/1207 - 4s - loss: 5.2504 - MSE_Epochs: 5.2504 - val_loss: 5.0079 - val_MSE_Epochs: 5.0079 - 4s/epoch - 4ms/step
Epoch 5/20
1207/1207 - 4s - loss: 4.9293 - MSE_Epochs: 4.9293 - val_loss: 4.3671 - val_MSE_Epochs: 4.3671 - 4s/epoch - 4ms/step
Epoch 6/20
1207/1207 - 4s - loss: 4.6423 - MSE_Epochs: 4.6423 - val_loss: 4.6663 - val_MSE_Epochs: 4.6663 - 4s/epoch - 4ms/step
Epoch 7/20
1207/1207 - 4s - loss: 4.2984 - MSE_Epochs: 4.2984 - val_loss: 3.9290 - val_MSE_Epochs: 3.9290 - 4s/epoch - 4ms/step
Epoch 8/20
1207/1207 - 4s - loss: 4.2508 - MSE_Epochs: 4.2508 - val_loss: 3.8093 - val_MSE_Epochs: 3.8093 - 4s/ep

2022-11-29 02:58:32.718401: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 02:58:36.077124: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 5s - loss: 71.7587 - MSE_Epochs: 71.7587 - val_loss: 8.0072 - val_MSE_Epochs: 8.0072 - 5s/epoch - 4ms/step
Epoch 2/20
1207/1207 - 4s - loss: 7.0036 - MSE_Epochs: 7.0036 - val_loss: 5.9017 - val_MSE_Epochs: 5.9017 - 4s/epoch - 4ms/step
Epoch 3/20
1207/1207 - 4s - loss: 5.8984 - MSE_Epochs: 5.8984 - val_loss: 5.0655 - val_MSE_Epochs: 5.0655 - 4s/epoch - 4ms/step
Epoch 4/20
1207/1207 - 4s - loss: 5.3133 - MSE_Epochs: 5.3133 - val_loss: 4.5268 - val_MSE_Epochs: 4.5268 - 4s/epoch - 4ms/step
Epoch 5/20
1207/1207 - 4s - loss: 4.9444 - MSE_Epochs: 4.9444 - val_loss: 4.2691 - val_MSE_Epochs: 4.2691 - 4s/epoch - 4ms/step
Epoch 6/20
1207/1207 - 4s - loss: 4.7256 - MSE_Epochs: 4.7256 - val_loss: 3.9558 - val_MSE_Epochs: 3.9558 - 4s/epoch - 4ms/step
Epoch 7/20
1207/1207 - 4s - loss: 4.4024 - MSE_Epochs: 4.4024 - val_loss: 3.9073 - val_MSE_Epochs: 3.9073 - 4s/epoch - 4ms/step
Epoch 8/20
1207/1207 - 5s - loss: 4.1453 - MSE_Epochs: 4.1453 - val_loss: 5.1942 - val_MSE_Epochs: 5.1942 - 5s/ep

2022-11-29 03:00:01.593936: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:00:05.014988: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 5s - loss: 49.2082 - MSE_Epochs: 49.2082 - val_loss: 7.3922 - val_MSE_Epochs: 7.3922 - 5s/epoch - 4ms/step
Epoch 2/20
1207/1207 - 4s - loss: 6.6799 - MSE_Epochs: 6.6799 - val_loss: 5.8637 - val_MSE_Epochs: 5.8637 - 4s/epoch - 4ms/step
Epoch 3/20
1207/1207 - 5s - loss: 5.9559 - MSE_Epochs: 5.9559 - val_loss: 5.4390 - val_MSE_Epochs: 5.4390 - 5s/epoch - 4ms/step
Epoch 4/20
1207/1207 - 4s - loss: 5.7558 - MSE_Epochs: 5.7558 - val_loss: 5.1738 - val_MSE_Epochs: 5.1738 - 4s/epoch - 4ms/step
Epoch 5/20
1207/1207 - 4s - loss: 5.2774 - MSE_Epochs: 5.2774 - val_loss: 4.5765 - val_MSE_Epochs: 4.5765 - 4s/epoch - 4ms/step
Epoch 6/20
1207/1207 - 4s - loss: 5.2630 - MSE_Epochs: 5.2630 - val_loss: 5.6832 - val_MSE_Epochs: 5.6832 - 4s/epoch - 4ms/step
Epoch 7/20
1207/1207 - 4s - loss: 4.9166 - MSE_Epochs: 4.9166 - val_loss: 5.7555 - val_MSE_Epochs: 5.7555 - 4s/epoch - 4ms/step
Epoch 8/20
1207/1207 - 4s - loss: 5.0656 - MSE_Epochs: 5.0656 - val_loss: 3.9799 - val_MSE_Epochs: 3.9799 - 4s/ep

2022-11-29 03:01:30.610293: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:01:34.558801: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 5s - loss: 49.4555 - MSE_Epochs: 49.4555 - val_loss: 7.2394 - val_MSE_Epochs: 7.2394 - 5s/epoch - 4ms/step
Epoch 2/20
1207/1207 - 4s - loss: 6.5906 - MSE_Epochs: 6.5906 - val_loss: 5.6384 - val_MSE_Epochs: 5.6384 - 4s/epoch - 4ms/step
Epoch 3/20
1207/1207 - 4s - loss: 6.1308 - MSE_Epochs: 6.1308 - val_loss: 5.0724 - val_MSE_Epochs: 5.0724 - 4s/epoch - 4ms/step
Epoch 4/20
1207/1207 - 4s - loss: 5.8186 - MSE_Epochs: 5.8186 - val_loss: 4.6050 - val_MSE_Epochs: 4.6050 - 4s/epoch - 4ms/step
Epoch 5/20
1207/1207 - 4s - loss: 5.3421 - MSE_Epochs: 5.3421 - val_loss: 4.3460 - val_MSE_Epochs: 4.3460 - 4s/epoch - 4ms/step
Epoch 6/20
1207/1207 - 4s - loss: 5.2836 - MSE_Epochs: 5.2836 - val_loss: 6.3473 - val_MSE_Epochs: 6.3473 - 4s/epoch - 4ms/step
Epoch 7/20
1207/1207 - 4s - loss: 4.9718 - MSE_Epochs: 4.9718 - val_loss: 4.0528 - val_MSE_Epochs: 4.0528 - 4s/epoch - 4ms/step
Epoch 8/20
1207/1207 - 4s - loss: 4.8022 - MSE_Epochs: 4.8022 - val_loss: 3.8721 - val_MSE_Epochs: 3.8721 - 4s/ep

2022-11-29 03:02:59.847901: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:03:03.249286: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 5s - loss: 50.5194 - MSE_Epochs: 50.5194 - val_loss: 7.0844 - val_MSE_Epochs: 7.0844 - 5s/epoch - 4ms/step
Epoch 2/20
1207/1207 - 5s - loss: 6.7836 - MSE_Epochs: 6.7836 - val_loss: 6.1948 - val_MSE_Epochs: 6.1948 - 5s/epoch - 4ms/step
Epoch 3/20
1207/1207 - 4s - loss: 6.1827 - MSE_Epochs: 6.1827 - val_loss: 7.4158 - val_MSE_Epochs: 7.4158 - 4s/epoch - 4ms/step
Epoch 4/20
1207/1207 - 4s - loss: 5.8796 - MSE_Epochs: 5.8796 - val_loss: 7.2750 - val_MSE_Epochs: 7.2750 - 4s/epoch - 4ms/step
Epoch 5/20
1207/1207 - 4s - loss: 5.3266 - MSE_Epochs: 5.3266 - val_loss: 4.5280 - val_MSE_Epochs: 4.5280 - 4s/epoch - 4ms/step
Epoch 6/20
1207/1207 - 4s - loss: 5.3853 - MSE_Epochs: 5.3853 - val_loss: 4.1072 - val_MSE_Epochs: 4.1072 - 4s/epoch - 4ms/step
Epoch 7/20
1207/1207 - 4s - loss: 5.2189 - MSE_Epochs: 5.2189 - val_loss: 3.9806 - val_MSE_Epochs: 3.9806 - 4s/epoch - 4ms/step
Epoch 8/20
1207/1207 - 4s - loss: 4.8665 - MSE_Epochs: 4.8665 - val_loss: 4.0244 - val_MSE_Epochs: 4.0244 - 4s/ep

2022-11-29 03:04:28.938955: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:04:32.354079: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 5s - loss: 109.1263 - MSE_Epochs: 109.1263 - val_loss: 210.7771 - val_MSE_Epochs: 210.7771 - 5s/epoch - 4ms/step
Epoch 2/20
1207/1207 - 4s - loss: 148.9168 - MSE_Epochs: 148.9168 - val_loss: 53.8502 - val_MSE_Epochs: 53.8502 - 4s/epoch - 4ms/step
Epoch 3/20
1207/1207 - 4s - loss: 82.8776 - MSE_Epochs: 82.8776 - val_loss: 54.5977 - val_MSE_Epochs: 54.5977 - 4s/epoch - 4ms/step
Epoch 4/20
1207/1207 - 4s - loss: 110.5459 - MSE_Epochs: 110.5459 - val_loss: 71.4522 - val_MSE_Epochs: 71.4522 - 4s/epoch - 4ms/step
Epoch 5/20
1207/1207 - 4s - loss: 118.3130 - MSE_Epochs: 118.3130 - val_loss: 162.7916 - val_MSE_Epochs: 162.7916 - 4s/epoch - 4ms/step
Epoch 6/20
1207/1207 - 5s - loss: 103.6031 - MSE_Epochs: 103.6031 - val_loss: 105.2660 - val_MSE_Epochs: 105.2660 - 5s/epoch - 4ms/step
Epoch 7/20
1207/1207 - 5s - loss: 113.0077 - MSE_Epochs: 113.0077 - val_loss: 55.2855 - val_MSE_Epochs: 55.2855 - 5s/epoch - 4ms/step
Epoch 8/20
1207/1207 - 4s - loss: 110.8471 - MSE_Epochs: 110.8471 - v

2022-11-29 03:05:58.519929: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:06:01.896817: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 5s - loss: 119.0438 - MSE_Epochs: 119.0438 - val_loss: 114.3902 - val_MSE_Epochs: 114.3902 - 5s/epoch - 4ms/step
Epoch 2/20
1207/1207 - 4s - loss: 140.7776 - MSE_Epochs: 140.7776 - val_loss: 61.5736 - val_MSE_Epochs: 61.5736 - 4s/epoch - 4ms/step
Epoch 3/20
1207/1207 - 4s - loss: 96.8588 - MSE_Epochs: 96.8588 - val_loss: 91.8092 - val_MSE_Epochs: 91.8092 - 4s/epoch - 4ms/step
Epoch 4/20
1207/1207 - 4s - loss: 106.2336 - MSE_Epochs: 106.2336 - val_loss: 128.7503 - val_MSE_Epochs: 128.7503 - 4s/epoch - 4ms/step
Epoch 5/20
1207/1207 - 4s - loss: 109.4490 - MSE_Epochs: 109.4490 - val_loss: 66.4996 - val_MSE_Epochs: 66.4996 - 4s/epoch - 4ms/step
Epoch 6/20
1207/1207 - 4s - loss: 126.7754 - MSE_Epochs: 126.7754 - val_loss: 78.8256 - val_MSE_Epochs: 78.8256 - 4s/epoch - 4ms/step
Epoch 7/20
1207/1207 - 4s - loss: 107.5203 - MSE_Epochs: 107.5203 - val_loss: 53.9505 - val_MSE_Epochs: 53.9505 - 4s/epoch - 4ms/step
Epoch 8/20
1207/1207 - 4s - loss: 113.5381 - MSE_Epochs: 113.5381 - val

2022-11-29 03:07:27.159873: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:07:30.543990: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 5s - loss: 119.4956 - MSE_Epochs: 119.4956 - val_loss: 265.0672 - val_MSE_Epochs: 265.0672 - 5s/epoch - 4ms/step
Epoch 2/20
1207/1207 - 4s - loss: 180.4942 - MSE_Epochs: 180.4942 - val_loss: 32.9948 - val_MSE_Epochs: 32.9948 - 4s/epoch - 4ms/step
Epoch 3/20
1207/1207 - 4s - loss: 104.1910 - MSE_Epochs: 104.1910 - val_loss: 56.2804 - val_MSE_Epochs: 56.2804 - 4s/epoch - 4ms/step
Epoch 4/20
1207/1207 - 4s - loss: 108.5898 - MSE_Epochs: 108.5898 - val_loss: 55.6074 - val_MSE_Epochs: 55.6074 - 4s/epoch - 4ms/step
Epoch 5/20
1207/1207 - 4s - loss: 145.0056 - MSE_Epochs: 145.0056 - val_loss: 99.1517 - val_MSE_Epochs: 99.1517 - 4s/epoch - 4ms/step
Epoch 6/20
1207/1207 - 4s - loss: 132.1889 - MSE_Epochs: 132.1889 - val_loss: 71.4790 - val_MSE_Epochs: 71.4790 - 4s/epoch - 4ms/step
Epoch 7/20
1207/1207 - 4s - loss: 146.1517 - MSE_Epochs: 146.1517 - val_loss: 63.7451 - val_MSE_Epochs: 63.7451 - 4s/epoch - 4ms/step
Epoch 8/20
1207/1207 - 4s - loss: 117.9649 - MSE_Epochs: 117.9649 - val

Repeat the previous process with a deep neural network, first defining the layers, then with hyperparameter tuning.

In [43]:
# Defining the model with 5 hidden layers and the output layer for 5 classes

deep_model = keras.Sequential( [keras.layers.Dense(20,activation='relu'),
                                keras.layers.Dense(20,activation='relu'),
                                keras.layers.Dense(20,activation='relu'),
                                keras.layers.Dense(20,activation='relu') ,
                                keras.layers.Dense(1)] ) 


y_pred = deep_model(x_train)
deep_model.summary()


Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_13 (Dense)            (57921, 20)               6260      
                                                                 
 dense_14 (Dense)            (57921, 20)               420       
                                                                 
 dense_15 (Dense)            (57921, 20)               420       
                                                                 
 dense_16 (Dense)            (57921, 20)               420       
                                                                 
 dense_17 (Dense)            (57921, 1)                21        
                                                                 
Total params: 7,541
Trainable params: 7,541
Non-trainable params: 0
_________________________________________________________________


In [44]:
# Compile the model
deep_model.compile(optimizer = 'adam',
    loss=tf.keras.losses.MeanSquaredError(), 
    metrics=[tf.keras.metrics.MeanSquaredError()])

In [45]:
deep_model.fit(x_train,y_train, epochs = 10,validation_data=(x_validate,y_validate),verbose = 2)

Epoch 1/10


2022-11-29 03:08:56.203110: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:09:04.435413: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1811/1811 - 10s - loss: 10.2444 - mean_squared_error: 10.2444 - val_loss: 4.9854 - val_mean_squared_error: 4.9854 - 10s/epoch - 5ms/step
Epoch 2/10
1811/1811 - 10s - loss: 4.1522 - mean_squared_error: 4.1522 - val_loss: 3.1724 - val_mean_squared_error: 3.1724 - 10s/epoch - 5ms/step
Epoch 3/10
1811/1811 - 10s - loss: 3.3287 - mean_squared_error: 3.3287 - val_loss: 5.5709 - val_mean_squared_error: 5.5709 - 10s/epoch - 5ms/step
Epoch 4/10
1811/1811 - 10s - loss: 2.9455 - mean_squared_error: 2.9455 - val_loss: 2.6054 - val_mean_squared_error: 2.6054 - 10s/epoch - 5ms/step
Epoch 5/10
1811/1811 - 10s - loss: 2.6960 - mean_squared_error: 2.6960 - val_loss: 3.4817 - val_mean_squared_error: 3.4817 - 10s/epoch - 5ms/step
Epoch 6/10
1811/1811 - 9s - loss: 2.4683 - mean_squared_error: 2.4683 - val_loss: 3.1159 - val_mean_squared_error: 3.1159 - 9s/epoch - 5ms/step
Epoch 7/10
1811/1811 - 9s - loss: 2.4530 - mean_squared_error: 2.4530 - val_loss: 5.0823 - val_mean_squared_error: 5.0823 - 9s/epoch - 

<keras.callbacks.History at 0x34c3d5c90>

In [46]:
HP_WIDTH = hp.HParam('NN_width', hp.Discrete([20, 30, 40])) # the width of the layers
HP_DEPTH = hp.HParam('NN_depth', hp.Discrete([4,6])) # Number of layers/depth

# Creating the logs
with tf.summary.create_file_writer('logs/deep_model').as_default():
    hp.hparams_config(
        hparams=[HP_WIDTH, HP_DEPTH],
        metrics=[hp.Metric('MSE')],
    )

In [47]:
# Defining the model training function for hyperparameter tuning
def train_test_deep_model(hparams,logdir, x, y, i, k):
    model = keras.Sequential()
    for _ in range(hparams[HP_DEPTH]): #Create the model
        model.add(keras.layers.Dense(hparams[HP_WIDTH],activation='relu'))
    model.add(keras.layers.Dense(1))
    model.compile(
        optimizer='adam',
        loss = tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.metrics.MeanSquaredError(name = 'MSE_Epochs')])
    
  # This section is for splitting the data sets into k folds. May be better to do this outside this function
    x_train = tf.split(x, k)
    y_train = tf.split(y, k)
    val_x = x_train.pop(i)
    val_y = y_train.pop(i)
    x_train = tf.concat([x_train[0], x_train[1]], 0)  
    y_train = tf.concat([y_train[0], y_train[1]], 0)  

  # Store the output of the model training
    history = model.fit(x_train, y_train, epochs=20, verbose = 2,
    callbacks=[tf.keras.callbacks.TensorBoard(log_dir=logdir, histogram_freq=1)],
    validation_data = (val_x, val_y))

    mse = np.min(history.history["val_MSE_Epochs"]) # Returns the lowest Mean Squared Error obtained in training
    return mse


In [48]:
# Nested loops for hyperparameter tuning, iterating over all the hyperparameters and combinations
for hp_width in HP_WIDTH.domain.values:
    for hp_depth in (HP_DEPTH.domain.values):
        hparams = {
            HP_WIDTH: hp_width,
            HP_DEPTH: hp_depth,
        }
        run_name = f"run-WIDTH {int(hparams[HP_WIDTH])}-DEPTH {hparams[HP_DEPTH]}"
        print('--- Starting trial: %s' % run_name)
        print({h.name: hparams[h] for h in hparams})

        run_dir = 'logs/deep_model/' + run_name
        
        # The model is trained with each of the k folds in the dataset and accuracy is averaged over the k runs.
        mse = 0
        for i in range(k):
            mse += train_test_deep_model(hparams, run_dir, x_train_shuffled, y_train_shuffled, i, k)
        mse = mse / k

        with tf.summary.create_file_writer(run_dir).as_default():
            hp.hparams(hparams)  # record the values used in this trial
            tf.summary.scalar("MSE", mse, step=1)

    

--- Starting trial: run-WIDTH 20-DEPTH 4
{'NN_width': 20, 'NN_depth': 4}
Epoch 1/20


2022-11-29 03:10:31.979523: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:10:37.461504: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 7s - loss: 12.9467 - MSE_Epochs: 12.9467 - val_loss: 4.8585 - val_MSE_Epochs: 4.8585 - 7s/epoch - 6ms/step
Epoch 2/20
1207/1207 - 7s - loss: 4.7236 - MSE_Epochs: 4.7236 - val_loss: 4.1093 - val_MSE_Epochs: 4.1093 - 7s/epoch - 6ms/step
Epoch 3/20
1207/1207 - 7s - loss: 3.8723 - MSE_Epochs: 3.8723 - val_loss: 3.3208 - val_MSE_Epochs: 3.3208 - 7s/epoch - 6ms/step
Epoch 4/20
1207/1207 - 7s - loss: 3.3843 - MSE_Epochs: 3.3843 - val_loss: 3.1018 - val_MSE_Epochs: 3.1018 - 7s/epoch - 6ms/step
Epoch 5/20
1207/1207 - 7s - loss: 3.1735 - MSE_Epochs: 3.1735 - val_loss: 2.8898 - val_MSE_Epochs: 2.8898 - 7s/epoch - 6ms/step
Epoch 6/20
1207/1207 - 7s - loss: 2.9097 - MSE_Epochs: 2.9097 - val_loss: 2.4666 - val_MSE_Epochs: 2.4666 - 7s/epoch - 6ms/step
Epoch 7/20
1207/1207 - 7s - loss: 2.6611 - MSE_Epochs: 2.6611 - val_loss: 3.6435 - val_MSE_Epochs: 3.6435 - 7s/epoch - 6ms/step
Epoch 8/20
1207/1207 - 7s - loss: 2.5522 - MSE_Epochs: 2.5522 - val_loss: 2.2760 - val_MSE_Epochs: 2.2760 - 7s/ep

2022-11-29 03:12:56.223163: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:13:02.543070: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 8s - loss: 14.4634 - MSE_Epochs: 14.4634 - val_loss: 5.0749 - val_MSE_Epochs: 5.0749 - 8s/epoch - 7ms/step
Epoch 2/20
1207/1207 - 7s - loss: 4.7711 - MSE_Epochs: 4.7711 - val_loss: 3.7670 - val_MSE_Epochs: 3.7670 - 7s/epoch - 6ms/step
Epoch 3/20
1207/1207 - 8s - loss: 4.0102 - MSE_Epochs: 4.0102 - val_loss: 3.1580 - val_MSE_Epochs: 3.1580 - 8s/epoch - 6ms/step
Epoch 4/20
1207/1207 - 7s - loss: 3.4676 - MSE_Epochs: 3.4676 - val_loss: 3.0048 - val_MSE_Epochs: 3.0048 - 7s/epoch - 6ms/step
Epoch 5/20
1207/1207 - 7s - loss: 3.0553 - MSE_Epochs: 3.0553 - val_loss: 4.0193 - val_MSE_Epochs: 4.0193 - 7s/epoch - 6ms/step
Epoch 6/20
1207/1207 - 7s - loss: 2.9926 - MSE_Epochs: 2.9926 - val_loss: 6.5605 - val_MSE_Epochs: 6.5605 - 7s/epoch - 6ms/step
Epoch 7/20
1207/1207 - 8s - loss: 2.6903 - MSE_Epochs: 2.6903 - val_loss: 2.3708 - val_MSE_Epochs: 2.3708 - 8s/epoch - 7ms/step
Epoch 8/20
1207/1207 - 7s - loss: 2.5334 - MSE_Epochs: 2.5334 - val_loss: 2.4158 - val_MSE_Epochs: 2.4158 - 7s/ep

2022-11-29 03:15:23.204662: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:15:28.842032: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 8s - loss: 12.3812 - MSE_Epochs: 12.3812 - val_loss: 4.6733 - val_MSE_Epochs: 4.6733 - 8s/epoch - 7ms/step
Epoch 2/20
1207/1207 - 7s - loss: 4.5126 - MSE_Epochs: 4.5126 - val_loss: 6.0892 - val_MSE_Epochs: 6.0892 - 7s/epoch - 6ms/step
Epoch 3/20
1207/1207 - 7s - loss: 3.8402 - MSE_Epochs: 3.8402 - val_loss: 3.0245 - val_MSE_Epochs: 3.0245 - 7s/epoch - 6ms/step
Epoch 4/20
1207/1207 - 7s - loss: 3.4108 - MSE_Epochs: 3.4108 - val_loss: 2.6809 - val_MSE_Epochs: 2.6809 - 7s/epoch - 6ms/step
Epoch 5/20
1207/1207 - 7s - loss: 3.0259 - MSE_Epochs: 3.0259 - val_loss: 2.6562 - val_MSE_Epochs: 2.6562 - 7s/epoch - 6ms/step
Epoch 6/20
1207/1207 - 7s - loss: 2.7784 - MSE_Epochs: 2.7784 - val_loss: 3.3816 - val_MSE_Epochs: 3.3816 - 7s/epoch - 6ms/step
Epoch 7/20
1207/1207 - 7s - loss: 2.6741 - MSE_Epochs: 2.6741 - val_loss: 2.3671 - val_MSE_Epochs: 2.3671 - 7s/epoch - 6ms/step
Epoch 8/20
1207/1207 - 7s - loss: 2.6375 - MSE_Epochs: 2.6375 - val_loss: 2.2164 - val_MSE_Epochs: 2.2164 - 7s/ep

2022-11-29 03:17:47.340791: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:17:54.089462: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 9s - loss: 26.5769 - MSE_Epochs: 26.5769 - val_loss: 6.4837 - val_MSE_Epochs: 6.4837 - 9s/epoch - 7ms/step
Epoch 2/20
1207/1207 - 8s - loss: 5.5609 - MSE_Epochs: 5.5609 - val_loss: 6.0226 - val_MSE_Epochs: 6.0226 - 8s/epoch - 7ms/step
Epoch 3/20
1207/1207 - 8s - loss: 4.2734 - MSE_Epochs: 4.2734 - val_loss: 4.3369 - val_MSE_Epochs: 4.3369 - 8s/epoch - 7ms/step
Epoch 4/20
1207/1207 - 8s - loss: 3.7590 - MSE_Epochs: 3.7590 - val_loss: 4.1729 - val_MSE_Epochs: 4.1729 - 8s/epoch - 7ms/step
Epoch 5/20
1207/1207 - 8s - loss: 3.3533 - MSE_Epochs: 3.3533 - val_loss: 3.2176 - val_MSE_Epochs: 3.2176 - 8s/epoch - 7ms/step
Epoch 6/20
1207/1207 - 8s - loss: 3.0122 - MSE_Epochs: 3.0122 - val_loss: 2.5461 - val_MSE_Epochs: 2.5461 - 8s/epoch - 7ms/step
Epoch 7/20
1207/1207 - 8s - loss: 2.8403 - MSE_Epochs: 2.8403 - val_loss: 3.0721 - val_MSE_Epochs: 3.0721 - 8s/epoch - 7ms/step
Epoch 8/20
1207/1207 - 9s - loss: 2.7750 - MSE_Epochs: 2.7750 - val_loss: 4.6546 - val_MSE_Epochs: 4.6546 - 9s/ep

2022-11-29 03:21:09.630534: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:21:19.212509: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 12s - loss: 27.2765 - MSE_Epochs: 27.2765 - val_loss: 8.9149 - val_MSE_Epochs: 8.9149 - 12s/epoch - 10ms/step
Epoch 2/20
1207/1207 - 12s - loss: 5.5813 - MSE_Epochs: 5.5813 - val_loss: 4.5633 - val_MSE_Epochs: 4.5633 - 12s/epoch - 10ms/step
Epoch 3/20
1207/1207 - 12s - loss: 4.2121 - MSE_Epochs: 4.2121 - val_loss: 4.1304 - val_MSE_Epochs: 4.1304 - 12s/epoch - 10ms/step
Epoch 4/20
1207/1207 - 11s - loss: 3.8643 - MSE_Epochs: 3.8643 - val_loss: 3.5478 - val_MSE_Epochs: 3.5478 - 11s/epoch - 9ms/step
Epoch 5/20
1207/1207 - 11s - loss: 3.3628 - MSE_Epochs: 3.3628 - val_loss: 2.8410 - val_MSE_Epochs: 2.8410 - 11s/epoch - 9ms/step
Epoch 6/20
1207/1207 - 11s - loss: 3.1036 - MSE_Epochs: 3.1036 - val_loss: 2.6360 - val_MSE_Epochs: 2.6360 - 11s/epoch - 9ms/step
Epoch 7/20
1207/1207 - 11s - loss: 2.7709 - MSE_Epochs: 2.7709 - val_loss: 3.4466 - val_MSE_Epochs: 3.4466 - 11s/epoch - 9ms/step
Epoch 8/20
1207/1207 - 11s - loss: 2.7546 - MSE_Epochs: 2.7546 - val_loss: 2.4420 - val_MSE_Epoc

2022-11-29 03:24:58.544178: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:25:08.096836: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 12s - loss: 26.8231 - MSE_Epochs: 26.8231 - val_loss: 9.3245 - val_MSE_Epochs: 9.3245 - 12s/epoch - 10ms/step
Epoch 2/20
1207/1207 - 11s - loss: 5.7053 - MSE_Epochs: 5.7053 - val_loss: 5.2396 - val_MSE_Epochs: 5.2396 - 11s/epoch - 9ms/step
Epoch 3/20
1207/1207 - 11s - loss: 4.4525 - MSE_Epochs: 4.4525 - val_loss: 3.6376 - val_MSE_Epochs: 3.6376 - 11s/epoch - 9ms/step
Epoch 4/20
1207/1207 - 11s - loss: 3.7745 - MSE_Epochs: 3.7745 - val_loss: 2.9051 - val_MSE_Epochs: 2.9051 - 11s/epoch - 9ms/step
Epoch 5/20
1207/1207 - 11s - loss: 3.2750 - MSE_Epochs: 3.2750 - val_loss: 2.8032 - val_MSE_Epochs: 2.8032 - 11s/epoch - 9ms/step
Epoch 6/20
1207/1207 - 11s - loss: 3.0082 - MSE_Epochs: 3.0082 - val_loss: 2.6262 - val_MSE_Epochs: 2.6262 - 11s/epoch - 9ms/step
Epoch 7/20
1207/1207 - 11s - loss: 2.8162 - MSE_Epochs: 2.8162 - val_loss: 2.3854 - val_MSE_Epochs: 2.3854 - 11s/epoch - 9ms/step
Epoch 8/20
1207/1207 - 11s - loss: 2.6564 - MSE_Epochs: 2.6564 - val_loss: 2.2305 - val_MSE_Epochs

2022-11-29 03:28:45.745410: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:28:53.757696: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 11s - loss: 41.0211 - MSE_Epochs: 41.0211 - val_loss: 7.1514 - val_MSE_Epochs: 7.1514 - 11s/epoch - 9ms/step
Epoch 2/20
1207/1207 - 10s - loss: 5.9131 - MSE_Epochs: 5.9131 - val_loss: 4.5085 - val_MSE_Epochs: 4.5085 - 10s/epoch - 8ms/step
Epoch 3/20
1207/1207 - 10s - loss: 4.4339 - MSE_Epochs: 4.4339 - val_loss: 5.5603 - val_MSE_Epochs: 5.5603 - 10s/epoch - 8ms/step
Epoch 4/20
1207/1207 - 10s - loss: 3.9147 - MSE_Epochs: 3.9147 - val_loss: 4.2545 - val_MSE_Epochs: 4.2545 - 10s/epoch - 8ms/step
Epoch 5/20
1207/1207 - 10s - loss: 3.5505 - MSE_Epochs: 3.5505 - val_loss: 3.3325 - val_MSE_Epochs: 3.3325 - 10s/epoch - 8ms/step
Epoch 6/20
1207/1207 - 10s - loss: 3.0725 - MSE_Epochs: 3.0725 - val_loss: 2.9486 - val_MSE_Epochs: 2.9486 - 10s/epoch - 8ms/step
Epoch 7/20
1207/1207 - 10s - loss: 2.9465 - MSE_Epochs: 2.9465 - val_loss: 3.5638 - val_MSE_Epochs: 3.5638 - 10s/epoch - 8ms/step
Epoch 8/20
1207/1207 - 10s - loss: 2.7798 - MSE_Epochs: 2.7798 - val_loss: 3.3505 - val_MSE_Epochs:

2022-11-29 03:31:59.881970: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:32:08.279682: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 11s - loss: 40.7958 - MSE_Epochs: 40.7958 - val_loss: 8.8097 - val_MSE_Epochs: 8.8097 - 11s/epoch - 9ms/step
Epoch 2/20
1207/1207 - 10s - loss: 5.4438 - MSE_Epochs: 5.4438 - val_loss: 4.3952 - val_MSE_Epochs: 4.3952 - 10s/epoch - 8ms/step
Epoch 3/20
1207/1207 - 10s - loss: 4.3827 - MSE_Epochs: 4.3827 - val_loss: 3.5730 - val_MSE_Epochs: 3.5730 - 10s/epoch - 8ms/step
Epoch 4/20
1207/1207 - 10s - loss: 3.8283 - MSE_Epochs: 3.8283 - val_loss: 3.0808 - val_MSE_Epochs: 3.0808 - 10s/epoch - 8ms/step
Epoch 5/20
1207/1207 - 10s - loss: 3.3872 - MSE_Epochs: 3.3872 - val_loss: 3.2831 - val_MSE_Epochs: 3.2831 - 10s/epoch - 8ms/step
Epoch 6/20
1207/1207 - 10s - loss: 3.0488 - MSE_Epochs: 3.0488 - val_loss: 2.8659 - val_MSE_Epochs: 2.8659 - 10s/epoch - 8ms/step
Epoch 7/20
1207/1207 - 10s - loss: 2.9559 - MSE_Epochs: 2.9559 - val_loss: 3.2336 - val_MSE_Epochs: 3.2336 - 10s/epoch - 8ms/step
Epoch 8/20
1207/1207 - 10s - loss: 2.7559 - MSE_Epochs: 2.7559 - val_loss: 3.1856 - val_MSE_Epochs:

2022-11-29 03:35:12.203961: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:35:20.126501: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 10s - loss: 43.3421 - MSE_Epochs: 43.3421 - val_loss: 7.7313 - val_MSE_Epochs: 7.7313 - 10s/epoch - 9ms/step
Epoch 2/20
1207/1207 - 10s - loss: 6.1333 - MSE_Epochs: 6.1333 - val_loss: 4.4758 - val_MSE_Epochs: 4.4758 - 10s/epoch - 8ms/step
Epoch 3/20
1207/1207 - 10s - loss: 4.6316 - MSE_Epochs: 4.6316 - val_loss: 4.2248 - val_MSE_Epochs: 4.2248 - 10s/epoch - 8ms/step
Epoch 4/20
1207/1207 - 10s - loss: 4.0579 - MSE_Epochs: 4.0579 - val_loss: 3.1140 - val_MSE_Epochs: 3.1140 - 10s/epoch - 8ms/step
Epoch 5/20
1207/1207 - 10s - loss: 3.6537 - MSE_Epochs: 3.6537 - val_loss: 3.9001 - val_MSE_Epochs: 3.9001 - 10s/epoch - 8ms/step
Epoch 6/20
1207/1207 - 10s - loss: 3.2174 - MSE_Epochs: 3.2174 - val_loss: 3.4514 - val_MSE_Epochs: 3.4514 - 10s/epoch - 8ms/step
Epoch 7/20
1207/1207 - 10s - loss: 2.9991 - MSE_Epochs: 2.9991 - val_loss: 4.3883 - val_MSE_Epochs: 4.3883 - 10s/epoch - 8ms/step
Epoch 8/20
1207/1207 - 10s - loss: 2.8032 - MSE_Epochs: 2.8032 - val_loss: 2.1995 - val_MSE_Epochs:

2022-11-29 03:38:26.005980: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:38:35.640448: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 12s - loss: 37.2664 - MSE_Epochs: 37.2664 - val_loss: 6.8015 - val_MSE_Epochs: 6.8015 - 12s/epoch - 10ms/step
Epoch 2/20
1207/1207 - 11s - loss: 5.2241 - MSE_Epochs: 5.2241 - val_loss: 4.3358 - val_MSE_Epochs: 4.3358 - 11s/epoch - 9ms/step
Epoch 3/20
1207/1207 - 11s - loss: 4.1986 - MSE_Epochs: 4.1986 - val_loss: 3.5553 - val_MSE_Epochs: 3.5553 - 11s/epoch - 9ms/step
Epoch 4/20
1207/1207 - 12s - loss: 3.6636 - MSE_Epochs: 3.6636 - val_loss: 3.8857 - val_MSE_Epochs: 3.8857 - 12s/epoch - 10ms/step
Epoch 5/20
1207/1207 - 12s - loss: 3.2831 - MSE_Epochs: 3.2831 - val_loss: 3.4167 - val_MSE_Epochs: 3.4167 - 12s/epoch - 10ms/step
Epoch 6/20
1207/1207 - 13s - loss: 3.1458 - MSE_Epochs: 3.1458 - val_loss: 2.8109 - val_MSE_Epochs: 2.8109 - 13s/epoch - 10ms/step
Epoch 7/20
1207/1207 - 12s - loss: 2.8338 - MSE_Epochs: 2.8338 - val_loss: 2.3604 - val_MSE_Epochs: 2.3604 - 12s/epoch - 10ms/step
Epoch 8/20
1207/1207 - 12s - loss: 2.6632 - MSE_Epochs: 2.6632 - val_loss: 3.3652 - val_MSE_Ep

2022-11-29 03:42:05.446929: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:42:12.743783: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 9s - loss: 37.9360 - MSE_Epochs: 37.9360 - val_loss: 6.0936 - val_MSE_Epochs: 6.0936 - 9s/epoch - 8ms/step
Epoch 2/20
1207/1207 - 9s - loss: 5.5298 - MSE_Epochs: 5.5298 - val_loss: 5.6272 - val_MSE_Epochs: 5.6272 - 9s/epoch - 8ms/step
Epoch 3/20
1207/1207 - 9s - loss: 4.3751 - MSE_Epochs: 4.3751 - val_loss: 5.3448 - val_MSE_Epochs: 5.3448 - 9s/epoch - 7ms/step
Epoch 4/20
1207/1207 - 9s - loss: 3.7579 - MSE_Epochs: 3.7579 - val_loss: 3.1227 - val_MSE_Epochs: 3.1227 - 9s/epoch - 7ms/step
Epoch 5/20
1207/1207 - 9s - loss: 3.4509 - MSE_Epochs: 3.4509 - val_loss: 4.1559 - val_MSE_Epochs: 4.1559 - 9s/epoch - 7ms/step
Epoch 6/20
1207/1207 - 9s - loss: 3.0661 - MSE_Epochs: 3.0661 - val_loss: 2.4682 - val_MSE_Epochs: 2.4682 - 9s/epoch - 7ms/step
Epoch 7/20
1207/1207 - 9s - loss: 2.9373 - MSE_Epochs: 2.9373 - val_loss: 2.4675 - val_MSE_Epochs: 2.4675 - 9s/epoch - 7ms/step
Epoch 8/20
1207/1207 - 9s - loss: 2.7200 - MSE_Epochs: 2.7200 - val_loss: 2.5336 - val_MSE_Epochs: 2.5336 - 9s/ep

2022-11-29 03:45:01.750031: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:45:08.908946: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 9s - loss: 38.8752 - MSE_Epochs: 38.8752 - val_loss: 5.6978 - val_MSE_Epochs: 5.6978 - 9s/epoch - 8ms/step
Epoch 2/20
1207/1207 - 9s - loss: 5.4170 - MSE_Epochs: 5.4170 - val_loss: 3.9593 - val_MSE_Epochs: 3.9593 - 9s/epoch - 7ms/step
Epoch 3/20
1207/1207 - 9s - loss: 4.4030 - MSE_Epochs: 4.4030 - val_loss: 3.3747 - val_MSE_Epochs: 3.3747 - 9s/epoch - 7ms/step
Epoch 4/20
1207/1207 - 9s - loss: 3.8705 - MSE_Epochs: 3.8705 - val_loss: 3.4864 - val_MSE_Epochs: 3.4864 - 9s/epoch - 7ms/step
Epoch 5/20
1207/1207 - 9s - loss: 3.4865 - MSE_Epochs: 3.4865 - val_loss: 3.1824 - val_MSE_Epochs: 3.1824 - 9s/epoch - 7ms/step
Epoch 6/20
1207/1207 - 9s - loss: 3.0447 - MSE_Epochs: 3.0447 - val_loss: 3.9446 - val_MSE_Epochs: 3.9446 - 9s/epoch - 7ms/step
Epoch 7/20
1207/1207 - 9s - loss: 2.8847 - MSE_Epochs: 2.8847 - val_loss: 5.8410 - val_MSE_Epochs: 5.8410 - 9s/epoch - 7ms/step
Epoch 8/20
1207/1207 - 9s - loss: 2.6883 - MSE_Epochs: 2.6883 - val_loss: 2.2468 - val_MSE_Epochs: 2.2468 - 9s/ep

2022-11-29 03:47:57.323366: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:48:03.713036: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 8s - loss: 29.9003 - MSE_Epochs: 29.9003 - val_loss: 5.7720 - val_MSE_Epochs: 5.7720 - 8s/epoch - 7ms/step
Epoch 2/20
1207/1207 - 8s - loss: 5.2722 - MSE_Epochs: 5.2722 - val_loss: 5.2955 - val_MSE_Epochs: 5.2955 - 8s/epoch - 7ms/step
Epoch 3/20
1207/1207 - 8s - loss: 4.2081 - MSE_Epochs: 4.2081 - val_loss: 4.1632 - val_MSE_Epochs: 4.1632 - 8s/epoch - 7ms/step
Epoch 4/20
1207/1207 - 8s - loss: 3.8105 - MSE_Epochs: 3.8105 - val_loss: 3.4159 - val_MSE_Epochs: 3.4159 - 8s/epoch - 7ms/step
Epoch 5/20
1207/1207 - 8s - loss: 3.4985 - MSE_Epochs: 3.4985 - val_loss: 3.5387 - val_MSE_Epochs: 3.5387 - 8s/epoch - 7ms/step
Epoch 6/20
1207/1207 - 8s - loss: 3.0965 - MSE_Epochs: 3.0965 - val_loss: 3.1282 - val_MSE_Epochs: 3.1282 - 8s/epoch - 6ms/step
Epoch 7/20
1207/1207 - 8s - loss: 3.1422 - MSE_Epochs: 3.1422 - val_loss: 3.0859 - val_MSE_Epochs: 3.0859 - 8s/epoch - 6ms/step
Epoch 8/20
1207/1207 - 8s - loss: 2.7834 - MSE_Epochs: 2.7834 - val_loss: 2.8449 - val_MSE_Epochs: 2.8449 - 8s/ep

2022-11-29 03:50:33.119864: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:50:39.608900: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 8s - loss: 29.5247 - MSE_Epochs: 29.5247 - val_loss: 5.3448 - val_MSE_Epochs: 5.3448 - 8s/epoch - 7ms/step
Epoch 2/20
1207/1207 - 8s - loss: 4.9106 - MSE_Epochs: 4.9106 - val_loss: 4.3311 - val_MSE_Epochs: 4.3311 - 8s/epoch - 7ms/step
Epoch 3/20
1207/1207 - 8s - loss: 4.1146 - MSE_Epochs: 4.1146 - val_loss: 3.3161 - val_MSE_Epochs: 3.3161 - 8s/epoch - 7ms/step
Epoch 4/20
1207/1207 - 8s - loss: 3.4613 - MSE_Epochs: 3.4613 - val_loss: 6.3960 - val_MSE_Epochs: 6.3960 - 8s/epoch - 7ms/step
Epoch 5/20
1207/1207 - 8s - loss: 3.3231 - MSE_Epochs: 3.3231 - val_loss: 2.5933 - val_MSE_Epochs: 2.5933 - 8s/epoch - 6ms/step
Epoch 6/20
1207/1207 - 8s - loss: 2.9912 - MSE_Epochs: 2.9912 - val_loss: 3.0825 - val_MSE_Epochs: 3.0825 - 8s/epoch - 6ms/step
Epoch 7/20
1207/1207 - 8s - loss: 2.8280 - MSE_Epochs: 2.8280 - val_loss: 2.2777 - val_MSE_Epochs: 2.2777 - 8s/epoch - 7ms/step
Epoch 8/20
1207/1207 - 8s - loss: 2.6839 - MSE_Epochs: 2.6839 - val_loss: 4.0779 - val_MSE_Epochs: 4.0779 - 8s/ep

2022-11-29 03:53:09.716814: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:53:16.157444: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 9s - loss: 27.9869 - MSE_Epochs: 27.9869 - val_loss: 6.2148 - val_MSE_Epochs: 6.2148 - 9s/epoch - 7ms/step
Epoch 2/20
1207/1207 - 8s - loss: 4.7873 - MSE_Epochs: 4.7873 - val_loss: 3.6151 - val_MSE_Epochs: 3.6151 - 8s/epoch - 7ms/step
Epoch 3/20
1207/1207 - 8s - loss: 4.0526 - MSE_Epochs: 4.0526 - val_loss: 3.1102 - val_MSE_Epochs: 3.1102 - 8s/epoch - 6ms/step
Epoch 4/20
1207/1207 - 8s - loss: 3.3742 - MSE_Epochs: 3.3742 - val_loss: 2.7341 - val_MSE_Epochs: 2.7341 - 8s/epoch - 7ms/step
Epoch 5/20
1207/1207 - 8s - loss: 3.3199 - MSE_Epochs: 3.3199 - val_loss: 2.5288 - val_MSE_Epochs: 2.5288 - 8s/epoch - 6ms/step
Epoch 6/20
1207/1207 - 8s - loss: 2.9920 - MSE_Epochs: 2.9920 - val_loss: 3.6143 - val_MSE_Epochs: 3.6143 - 8s/epoch - 7ms/step
Epoch 7/20
1207/1207 - 8s - loss: 2.7622 - MSE_Epochs: 2.7622 - val_loss: 2.1940 - val_MSE_Epochs: 2.1940 - 8s/epoch - 7ms/step
Epoch 8/20
1207/1207 - 8s - loss: 2.6758 - MSE_Epochs: 2.6758 - val_loss: 2.3658 - val_MSE_Epochs: 2.3658 - 8s/ep

2022-11-29 03:55:46.968881: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:55:54.141276: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 9s - loss: 51.1017 - MSE_Epochs: 51.1017 - val_loss: 5.3216 - val_MSE_Epochs: 5.3216 - 9s/epoch - 8ms/step
Epoch 2/20
1207/1207 - 9s - loss: 5.2345 - MSE_Epochs: 5.2345 - val_loss: 4.0642 - val_MSE_Epochs: 4.0642 - 9s/epoch - 8ms/step
Epoch 3/20
1207/1207 - 9s - loss: 4.3608 - MSE_Epochs: 4.3608 - val_loss: 8.0813 - val_MSE_Epochs: 8.0813 - 9s/epoch - 8ms/step
Epoch 4/20
1207/1207 - 8s - loss: 3.6439 - MSE_Epochs: 3.6439 - val_loss: 3.1118 - val_MSE_Epochs: 3.1118 - 8s/epoch - 7ms/step
Epoch 5/20
1207/1207 - 8s - loss: 3.4511 - MSE_Epochs: 3.4511 - val_loss: 2.9420 - val_MSE_Epochs: 2.9420 - 8s/epoch - 7ms/step
Epoch 6/20
1207/1207 - 8s - loss: 3.0741 - MSE_Epochs: 3.0741 - val_loss: 2.7417 - val_MSE_Epochs: 2.7417 - 8s/epoch - 7ms/step
Epoch 7/20
1207/1207 - 8s - loss: 2.9360 - MSE_Epochs: 2.9360 - val_loss: 2.3735 - val_MSE_Epochs: 2.3735 - 8s/epoch - 7ms/step
Epoch 8/20
1207/1207 - 8s - loss: 2.8418 - MSE_Epochs: 2.8418 - val_loss: 2.8635 - val_MSE_Epochs: 2.8635 - 8s/ep

2022-11-29 03:58:31.695945: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 03:58:38.333542: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 9s - loss: 52.0145 - MSE_Epochs: 52.0145 - val_loss: 5.2110 - val_MSE_Epochs: 5.2110 - 9s/epoch - 7ms/step
Epoch 2/20
1207/1207 - 8s - loss: 5.1188 - MSE_Epochs: 5.1188 - val_loss: 4.1879 - val_MSE_Epochs: 4.1879 - 8s/epoch - 7ms/step
Epoch 3/20
1207/1207 - 8s - loss: 4.4302 - MSE_Epochs: 4.4302 - val_loss: 3.3693 - val_MSE_Epochs: 3.3693 - 8s/epoch - 7ms/step
Epoch 4/20
1207/1207 - 8s - loss: 3.6977 - MSE_Epochs: 3.6977 - val_loss: 3.3546 - val_MSE_Epochs: 3.3546 - 8s/epoch - 7ms/step
Epoch 5/20
1207/1207 - 8s - loss: 3.3764 - MSE_Epochs: 3.3764 - val_loss: 2.9161 - val_MSE_Epochs: 2.9161 - 8s/epoch - 7ms/step
Epoch 6/20
1207/1207 - 8s - loss: 3.1428 - MSE_Epochs: 3.1428 - val_loss: 2.4946 - val_MSE_Epochs: 2.4946 - 8s/epoch - 7ms/step
Epoch 7/20
1207/1207 - 8s - loss: 2.9440 - MSE_Epochs: 2.9440 - val_loss: 2.6206 - val_MSE_Epochs: 2.6206 - 8s/epoch - 7ms/step
Epoch 8/20
1207/1207 - 8s - loss: 2.7357 - MSE_Epochs: 2.7357 - val_loss: 2.2378 - val_MSE_Epochs: 2.2378 - 8s/ep

2022-11-29 04:01:12.099224: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-11-29 04:01:18.706622: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


1207/1207 - 9s - loss: 52.6451 - MSE_Epochs: 52.6451 - val_loss: 6.0981 - val_MSE_Epochs: 6.0981 - 9s/epoch - 7ms/step
Epoch 2/20
1207/1207 - 8s - loss: 5.4765 - MSE_Epochs: 5.4765 - val_loss: 6.8068 - val_MSE_Epochs: 6.8068 - 8s/epoch - 7ms/step
Epoch 3/20
1207/1207 - 8s - loss: 4.2998 - MSE_Epochs: 4.2998 - val_loss: 3.2838 - val_MSE_Epochs: 3.2838 - 8s/epoch - 7ms/step
Epoch 4/20
1207/1207 - 8s - loss: 3.8066 - MSE_Epochs: 3.8066 - val_loss: 4.1883 - val_MSE_Epochs: 4.1883 - 8s/epoch - 7ms/step
Epoch 5/20
1207/1207 - 8s - loss: 3.4593 - MSE_Epochs: 3.4593 - val_loss: 5.6484 - val_MSE_Epochs: 5.6484 - 8s/epoch - 7ms/step
Epoch 6/20
1207/1207 - 8s - loss: 3.1908 - MSE_Epochs: 3.1908 - val_loss: 2.3829 - val_MSE_Epochs: 2.3829 - 8s/epoch - 7ms/step
Epoch 7/20
1207/1207 - 8s - loss: 2.9282 - MSE_Epochs: 2.9282 - val_loss: 2.3309 - val_MSE_Epochs: 2.3309 - 8s/epoch - 7ms/step
Epoch 8/20
1207/1207 - 8s - loss: 2.7863 - MSE_Epochs: 2.7863 - val_loss: 2.1871 - val_MSE_Epochs: 2.1871 - 8s/ep