In [1]:
from spmf import Spmf
import pandas as pd
import altair as alt
from vega_datasets import data

In [2]:
event_type_dic = {0: 'Announcement', 1: 'Attempt', 2: 'Corner', 3: 'Foul', 4: 'Yellow card', 5: 'Second yellow card', 6: 'Red card', 7: 'Substitution', 8: 'Free kick won', 9: 'Offside', 10: 'Hand ball', 11: 'Penalty conceded'}

In [3]:
df = pd.read_csv('events.csv')

# Is there any patterns in the whole game?

In [4]:
data1 = df
cols = ['id_odsp', 'event_type']
data1['match_event_type'] = data1[cols].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)
grouped_data = data1.groupby(['event_team','opponent'])
dataframes = [group for _, group in grouped_data]

In [5]:
allMatchEventsAsSequence = []
for game in dataframes:
    allMatchEventsAsSequence.append([game["event_type"].tolist()])

In [6]:
spmf_all_game = Spmf("PrefixSpan", input_direct=allMatchEventsAsSequence,
            output_filename="output.txt",
            arguments=[.9, 6])

spmf_all_game.run()
allGame = spmf_all_game.to_pandas_dataframe(pickle=True)
allGame['pattern'] = allGame['pattern'].map(lambda a: a[0].split())

>/Users/ehsanjso/Desktop/Waterloo/Second term/CS889/VD3/spmf.jar
 Total time ~ 207819 ms
 Frequent sequences count : 68522
 Max memory (mb) : 276.6285858154297
 minsup = 3156 sequences.
 Pattern count : 68522




In [173]:
allGamePatterns = pd.DataFrame(allGame.pattern.values.tolist())
# allGamePatterns = allGamePatterns.fillna(value=0).add_prefix('code_')
allGamePatterns['sup'] = pd.Series(allGame['sup'])

# First 5 min to the game, what is exciting?

In [12]:
start_game = df[df['time'] <= 5]

In [13]:
grouped_data = start_game.groupby(['event_team','opponent'])
start_game_dataframes = [group for _, group in grouped_data]

In [15]:
startGameEventsAsSequence = []
for game in start_game_dataframes:
    startGameEventsAsSequence.append([game["event_type"].tolist()])

In [None]:
startGameEventsAsSequence[0]

In [None]:
spmf = Spmf("SPAM", input_direct=startGameEventsAsSequence[:10],
            output_filename="output_start_game.txt",
            arguments=[.5, 10])

spmf.run()
print(spmf.to_pandas_dataframe(pickle=True))
startGamePatterns = spmf.to_pandas_dataframe(pickle=True)
startGamePatterns['pattern'] = startGamePatterns['pattern'].map(lambda a: a[0].split())

In [111]:
startGamePatternsDict = pd.DataFrame(startGamePatterns.pattern.values.tolist())
startGamePatternsDict = startGamePatternsDict.fillna(value=0).add_prefix('code_')
startGamePatternsDict['sup'] = pd.Series(startGamePatterns['sup'])

In [None]:
startGamePatternsDict

# What happens after red card?

In [135]:
grouped_data = start_game.groupby(['event_team','opponent'])
red_card_dataframes = [group for _, group in grouped_data]

In [137]:
redCardEventsAsSequence = []
for game in red_card_dataframes:
    redCardEventsAsSequence.append([game["event_type"].tolist()])

# Vis

### First 5 min sequence

In [182]:
alt.Chart(startGamePatternsDict).transform_window(
    index='count()'
).transform_fold(
    ['code_0', 'code_1', 'code_2', 'code_3']
).mark_line().encode(
    x='key:N',
    y='value:Q',
    color='sup:N',
    detail='index:N',
    opacity=alt.value(0.5)
).properties(width=700, height=500)

In [183]:
alt.Chart(data.iris()).mark_circle().encode(
    alt.X('sepalLength', scale=alt.Scale(zero=False)),
    alt.Y('sepalWidth', scale=alt.Scale(zero=False, padding=1)),
    color='species',
    size='petalWidth'
)

In [176]:
source = data.cars()

# Brush for selection
brush = alt.selection(type='interval')

# Scatter Plot
points = alt.Chart(source).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color=alt.condition(brush, 'Cylinders:O', alt.value('grey'))
).add_selection(brush)

# Base chart for data tables
ranked_text = alt.Chart(startGamePatternsDict).mark_text().encode(
    y=alt.Y('row_number:O',axis=None)
).transform_window(
    row_number='row_number()'
).transform_filter(
    brush
).transform_window(
    rank='rank(row_number)'
).transform_filter(
    alt.datum.rank<20
)

# Build chart
alt.hconcat(
    points,
).resolve_legend(
    color="independent"
)

### All Game Vis

In [133]:
alt.data_transformers.disable_max_rows()
alt.Chart(allGamePatterns).transform_window(
    index='count()'
).transform_fold(
    ['code_0', 'code_1', 'code_2', 'code_3', 'code_4', 'code_5']
).mark_line().encode(
    x='key:N',
    y='value:Q',
    color='sup:N',
    detail='index:N',
    opacity=alt.value(0.5)
).properties(width=500)

In [174]:
source = data.seattle_temps.url

alt.Chart(
    source,
    title="2010 Daily High Temperature (F) in Seattle, WA"
).mark_rect().encode(
    x='date(date):O',
    y='month(date):O',
    color=alt.Color('max(temp):Q', scale=alt.Scale(scheme="inferno")),
    tooltip=[
        alt.Tooltip('monthdate(date):T', title='Date'),
        alt.Tooltip('max(temp):Q', title='Max Temp')
    ]
).properties(width=550)

In [175]:
source

'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/seattle-temps.csv'