In [112]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf
from pyspark.sql.types import StringType
from ast import literal_eval

import json

In [48]:
spark = SparkSession.builder.appName("Test").getOrCreate()

In [49]:
spark

In [114]:
## reading the dataset
df = spark.read.csv('../../files/075435482.csv',header=True,inferSchema=True)

In [86]:
df.printSchema()

root
 |-- match_url: string (nullable = true)
 |-- bookie: string (nullable = true)
 |-- odds: string (nullable = true)



In [87]:
df.show()

+--------------------+------------+--------------------+
|           match_url|      bookie|                odds|
+--------------------+------------+--------------------+
|https://www.oddsp...|    10x10bet|['31/25', '5/2', ...|
|https://www.oddsp...|       1xBet|['129/100', '127/...|
|https://www.oddsp...| bet-at-home|['59/50', '12/5',...|
|https://www.oddsp...|      bet365|['23/20', '5/2', ...|
|https://www.oddsp...|    BetFinal|['63/50', '5/2', ...|
|https://www.oddsp...|        bwin|['6/5', '5/2', '1...|
|https://www.oddsp...|     Coolbet|['61/50', '63/25'...|
|https://www.oddsp...|     Curebet|['31/25', '5/2', ...|
|https://www.oddsp...|     Ditobet|['5/4', '253/100'...|
|https://www.oddsp...|       GGBET|['32/25', '131/50...|
|https://www.oddsp...|      Lasbet|['5/4', '253/100'...|
|https://www.oddsp...| Marathonbet|['127/100', '63/2...|
|https://www.oddsp...|      N1 Bet|['123/100', '5/2'...|
|https://www.oddsp...|   Parimatch|['123/100', '49/2...|
|https://www.oddsp...|    Pinna

In [131]:
def ConvertProbabilityToDecimal(probability_list):
    decimal_list = []
    probability_list = literal_eval(probability_list)
    for i in probability_list:
        num,den = i.split("/")
        decimal_odd = 1 + float(num) / float(den)
        
        decimal_list.append(decimal_odd)
        
    return decimal_list

In [132]:
convertOddsUDF = udf(lambda z: ConvertProbabilityToDecimal(z), StringType())

In [133]:
df.withColumn("decimal_odds", convertOddsUDF(df.odds))

DataFrame[match_url: string, bookie: string, odds: string, decimal_odds: string]

In [134]:
df.show()

+--------------------+------------+--------------------+--------------------+
|           match_url|      bookie|                odds|        decimal_odds|
+--------------------+------------+--------------------+--------------------+
|https://www.oddsp...|    10x10bet|['31/25', '5/2', ...|   [2.24, 3.5, 3.37]|
|https://www.oddsp...|       1xBet|['129/100', '127/...|  [2.29, 3.54, 3.36]|
|https://www.oddsp...| bet-at-home|['59/50', '12/5',...|[2.17999999999999...|
|https://www.oddsp...|      bet365|['23/20', '5/2', ...|   [2.15, 3.5, 3.25]|
|https://www.oddsp...|    BetFinal|['63/50', '5/2', ...|    [2.26, 3.5, 3.4]|
|https://www.oddsp...|        bwin|['6/5', '5/2', '1...|     [2.2, 3.5, 3.2]|
|https://www.oddsp...|     Coolbet|['61/50', '63/25'...|[2.21999999999999...|
|https://www.oddsp...|     Curebet|['31/25', '5/2', ...|   [2.24, 3.5, 3.37]|
|https://www.oddsp...|     Ditobet|['5/4', '253/100'...|  [2.25, 3.53, 3.41]|
|https://www.oddsp...|       GGBET|['32/25', '131/50...|[2.28000