In [1]:
from numpy import array, ndarray
from time import time
from itertools import product

import code.profiles.generation as gen
from code.profiles.definitions import Profile, Voter, Candidate, notVCR33
from code.profiles.vcrDetection import detectVCRProperty, detectCRProperty, detectVRProperty
from code.profiles.vcrDomain import isVCR
from code.utils import getNumpyColumns

In [2]:
from pyspark.sql import SparkSession, Row
from pyspark.sql.types import StructType, StructField, FloatType

spark = SparkSession.builder.getOrCreate()
sc = spark.sparkContext # sc = SparkContext.getOrCreate()
spark

In [7]:
def run(savePath: str, C: int=3, V:int =3):
    statistics = {}
    candidatesIds = ["A", "B", "C", "D", "E", "F", "G"][:C]
    votersIds = ["0", "1", "2", "3", "4", "5", "6"][:V]

    profilesRDD = sc.parallelize(gen.parallelProfileGeneration(cpu=16, candidatesNumber=C, voterNumber=V))

    print("ONE")
    
    vcrProfilesRDD = profilesRDD \
        .map(lambda p: (p,detectVCRProperty(A=p, C=candidatesIds[:C], V=votersIds[:V]))) \
        .filter(lambda pRes: pRes[1][0] == 2)
    
    statistics["VCR"] = vcrProfilesRDD.count()
    print(statistics["VCR"])
    
    vcrNCOPProfilesRDD = vcrProfilesRDD \
        .filter(lambda pRes: not detectCRProperty(A=pRes[0], C=candidatesIds, V=votersIds)) \
        .filter(lambda pRes: not detectVRProperty(A=pRes[0], C=candidatesIds, V=votersIds)) \
        .map(lambda pRes: Profile.fromILPRes(pRes[0], pRes[1][1], candidatesIds, votersIds))

    statistics["NCOPVCR"] = vcrNCOPProfilesRDD.count()
    print(statistics["NCOPVCR"])
    
    NPRow = Row(*tuple(getNumpyColumns(C,V)))
    schema = StructType([StructField(n, FloatType(), False) for n in getNumpyColumns(C,V)])
    
    vcrNCOPNumpyRows = vcrNCOPProfilesRDD \
        .map(lambda profile: profile.asNumpy().tolist()) \
        .map(lambda a: NPRow(*tuple(a))) \
        
    spark.createDataFrame(vcrNCOPNumpyRows, schema) \
        .write.format("com.databricks.spark.csv") \
        .option("header", "true") \
        .save(savePath) \
    
    spark.createDataFrame(statistics.items(), ["key", "value"]) \
        .repartition(1) \
        .write.format("com.databricks.spark.csv") \
        .option("header", "true") \
        .save("resources/"+ savePath[-6:] +"-stats.csv") \
      
    return statistics, vcrNCOPNumpyRows

In [9]:
startTime = time()
stats, vcrNCOPProfiles = run('resources/3C3V-2', C=3, V=3)
endTime = time()
print(endTime - startTime)
print(stats)

ONE
506
0
0.8681762218475342
{'VCR': 506, 'NCOPVCR': 0}


In [8]:
startTime = time()
stats, vcrNCOPProfiles = run('resources/4C4V-1', C=4, V=4)
endTime = time()
print(endTime - startTime)
print(stats)

ONE
57832
96
84.66908073425293
{'VCR': 57832, 'NCOPVCR': 96}
