In [None]:
import random
import re
import os
from datetime import date, timedelta
from itertools import tee
import pandas as pd
import datetime

import pymongo
from pymongo.collection import Collection

from pyspark.sql import SparkSession
from pyspark.sql.functions import col, explode

##### Initialisation de MongoDB & Spark

In [None]:
client = pymongo.MongoClient("mongodb://127.0.0.1:27017/")
spark = SparkSession.builder.getOrCreate()

In [46]:
# client.drop_database('AviFest')

### Création de la base de donnée

In [None]:
db = client['AviFest']

### Price

In [None]:
prices = db['price']
prices.insert_many([
    {'type': 'performance', 'price': 10},
    {'type': 'indiscipline', 'price': 20},
    {'type': 'spectacle', 'price': 30},
    {'type': 'théâtre', 'price': 40},
    {'type': 'dance', 'price': 50},
    {'type': 'installation photographique', 'price': 60},
])

### Users

In [None]:
users = db['user']

In [None]:
def preprocessing(row):
    row['gender'] = row['gender'][0]
    firstname, lastname = row['name'].split(' ')
    row['firstname'] = firstname
    row['lastname'] = lastname
    
    del row['name']
    
    return row

In [None]:
persons = spark.read.json('json/data.json').withColumn("persons", explode(col("persons"))).select(
    "persons.name",
    "persons.adress",
    "persons.phone",
    "persons.email",
    "persons.age",
    "persons.gender",
)

df = persons.toPandas().apply(preprocessing, axis=1)

In [None]:
for index, row in df.iterrows():
    users.insert_one({
        'phone': row['phone'],
        'email': row['email'],
        'sexe': row['gender'],
        'age': row['age'],
        'firstname': row['firstname'],
        'lastname': row['lastname'],
    })

In [None]:
# for document in users.find():
#     print(document)

### Shows

In [None]:
shows = db['show']

In [None]:
df = spark.read.options(header='True', inferSchema='True', delimiter=',') \
  .csv("csv/show.csv") \
  .drop('id_location', 'id_show', 'artist')
df = df.toPandas()

In [None]:
locations = spark.read.json('json/locations.json', multiLine=True) \
    .withColumn("features", explode(col("features.geometry.coordinates"))) \
    .withColumn("lat", col('features').getItem(0).alias('lat')) \
    .withColumn("long", col('features').getItem(1).alias('long')) \
    .drop("type", "features") \
    .toPandas()

In [None]:
current = date(2022, 6, 1)
end = date(2022, 7, 1)
date_list = []

while current < end:
    for item in pd.date_range(current+pd.DateOffset(hours=10), current+pd.DateOffset(hours=24), freq='2H'):
        date_list.append(str(item))
    current += timedelta(days=1)

random.shuffle(date_list)

In [None]:
artists = spark.read.json('json/data.json').withColumn("artists", explode(col("artists"))).select(
    "artists.firstname",
    "artists.lastname",
)

artists = artists.toPandas().sample(frac=1).reset_index(drop=True)

In [None]:
for index, row in df.iterrows():
    lat, long = locations.loc[index % locations.shape[0]].values
    shows.insert_one({
        'title': row['nom'],
        'type': row['type'],
        'artist': ' '.join(artists.loc[index % artists.shape[0]].values),
        'nbPlace': row['nb_place'],
        'lat': lat,
        'long': long,
        'date' : date_list[index],
    })

In [None]:
# for document in shows.find():
#    print(document)

### Parkings

In [None]:
parkings = db['parking']

In [None]:
# Initialisation de 2 générateurs identiques
rows, get_nb_rows = tee(spark.read.option("multiline", "true").json('json/parkings.json').toPandas().iterrows())
nb_parkings = len(list((get_nb_rows)))

# Liste de booleans aléatoires avec 25% de False
list_pmr = list(map(lambda x: x < 0.75, [random.random() for _ in range(nb_parkings)]))

description: str
# Boucle sur 3 list différentes, les parkings et 2 listes aléatoires pour générer des booleans
# Avec Spark, je peux directement unpack le json, c'est bizarre mais ca marche
for (_, (((long, lat), _), (description, name), _)), pmr, bus in zip(rows, list_pmr, reversed(list_pmr)):
    
    try:
        slot = int(re.search(r'\*\*(\d+)\*\*', description).group(1))
        
        if not slot:
            slot = 'unknown'
    except: slot = 'unknown'
    
    parkings.insert_one({
        'type': 'voiture',
        'name': name,
        'nbslots': slot,
        'paying': not 'gratuit' in description.lower(),
        'busFestiv': bus,
        'pmr': pmr,
        'lat': lat,
        'long': long,
    })

### Réservation

In [None]:
reservations = db['reservation']

In [None]:
def get_random_id_from_collection(collection: Collection):
    return str(collection.aggregate([
                { '$sample': { 'size': 1 } },
                { "$project": {
                    "_id": 1,
                }}
            ]).next()['_id'])

In [47]:
for _ in range(20):
    reservations.insert_one({
        "id_person": get_random_id_from_collection(users),
        "id_show": get_random_id_from_collection(shows),
        'created_at': date(2022, 6, 1) + timedelta(days=random.randint(0, 29)),
        "nbreservation": random.randint(1, 5),
    })

StopIteration: 

In [None]:
# for document in reservations.find():
#    print(document)

Le choix multiple pour  la categorie

In [None]:
# artists = [document for document in shows.find({}, {'artist' : 1, '_id' : 0})]
# artists = list(set(map(lambda artists: artists["artist"], artists)))


# date_start = widgets.DatePicker(
#     description='Date début',
#     disabled=False,
#     value = datetime.date(2022,6,1)
# )
# date_end = widgets.DatePicker(
#     description='Date fin',
#     disabled=False,
#     value = datetime.date(2022,8,31)
# )
# hour_start = widgets.IntText(
#     value='0',
#     description='Heure début:',
#     disabled=False
# )
# hour_end = widgets.IntText(
#     value='0',
#     description='Heure fin:',
#     disabled=False
# )

# artiste = widgets.Dropdown(
#     options=artists,
#     value='Aaron Adams',
#     description='Artiste:',
#     disabled=False,
#     style={'description_width': 'initial'}
# )
# data = ["performance", "indiscipline", "spectacle", "théatre", "danse", "Installation photographique"]
# checkboxes = [widgets.Checkbox(value=False, description=label) for label in data]
# output1 = widgets.VBox(children=checkboxes)
# display(output1, artiste, widgets.HBox(children=[date_start, hour_start]), widgets.HBox(children=[date_end, hour_end]))

In [None]:
# for document in shows.find({'type' : { '$in' : [i.description for i in checkboxes if i.value == True]}, 
#                             'date' : {'$gte' : date_start.value.strftime("%Y-%m-%d ") + str(timedelta(hours=hour_start.value)), 
#                                       '$lt' : date_end.value.strftime("%Y-%m-%d ") + str(timedelta(hours=hour_end.value))},
#                             'artist' : artiste.value}):
#     print(document)