# Imports and start spark

In [None]:
from pyspark import pandas as ps
import re
import numpy as np
import os
#import pandas as pd

from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
from pyspark.sql.functions import concat, concat_ws, lit, col, trim, expr
from pyspark.sql.types import StructType, StructField, StringType,IntegerType

os.environ["PYARROW_IGNORE_TIMEZONE"]="1"

def get_spark_session(app_name: str, conf: SparkConf):
    conf.setMaster('local[*]')
    conf \
      .set('spark.driver.memory', '64g')\
      .set("fs.s3a.access.key", "minio") \
      .set("fs.s3a.secret.key", "minio123") \
      .set("fs.s3a.endpoint", "http://192.168.1.127:9000") \
      .set("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
      .set("spark.hadoop.fs.s3a.path.style.access", "true") \
      .set("spark.sql.repl.eagerEval.enabled", "True") \
      .set("spark.sql.adaptive.enabled", "True") \
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
      .set("spark.sql.repl.eagerEval.maxNumRows", "10000") \
      .set("sc.setLogLevel", "error")
    
    return SparkSession.builder.appName(app_name).config(conf=conf).getOrCreate()

spark = get_spark_session("Falk", SparkConf())

In [None]:
spark.stop()

<a href='#f01'>f01</a>
<a href='#f02'>f02</a>
<a href='#f03'>f03</a>
<a href='#f04'>f04</a>
<a href='#f05'>f05</a>
<a href='#f06'>f06</a>
<a href='#f07'>f07</a>
<a href='#f08'>f08</a>
<a href='#f12'>f12</a>
<a href='#f13'>f13</a>
<a href='#f14'>f14</a>
<a href='#f15'>f15</a>
<a href='#f20'>f20</a>
<a href='#f21'>f21</a>
<a href='#f22'>f22</a>
<a href='#f23'>f23</a>
<a href='#f24'>f24</a>
<a href='#f25'>f25</a>
<a href='#f52'>f52</a>
<a href='#f65'>f65</a>
<a href='#f102'>f102</a>
<a href='#d1'>d1</a>
<a href='#d2'>d2</a>
<a href='#d3'>d3</a>
<a href='#d4'>d4</a>
<a href='#d5'>d5</a>
<a href='#d6'>d6</a>
<a href='#d7'>d7</a>
<a href='#d8'>d8</a>
<a href='#d9'>d9</a>
<a href='#d10'>d10</a>
<a href='#d11'>d11</a>
<a href='#d12'>d12</a>
<a href='#d13'>d13</a>
<a href='#d14'>d14</a>
<a href='#d15'>d15</a>
<a href='#d16'>d16</a>
<a href='#d17'>d17</a>
<a href='#d18'>d18</a>
<a href='#d101'>d101</a>
<a href='#d102'>d102</a>
<a href='#d103'>d103</a>

<a id='f01' />

# F01 PRI 

In [None]:
#f01 = spark.read.json("falk/F01.json/*.json")

In [None]:
#f01.printSchema()

In [None]:
pf01 = f01.to_pandas_on_spark()

In [None]:
pf01 = pf01.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf01 = pf01.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf01 = pf01.rename(columns=lambda x_ re.sub('F01_2014_', '', x))
#pf01 = pf01.rename(columns=lambda x_ re.sub('_', '_', x))
pf01 = pf01.rename(columns=lambda x_ re.sub('\_P$', '', x))

In [None]:
#pf01.info()

In [None]:
pf01["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf01["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
pf01["OBJECT_CONTRACT_DATE_PUBLICATION_NOTICE"] = ps.to_datetime(pf01["OBJECT_CONTRACT_DATE_PUBLICATION_NOTICE"])
pf01["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"] = ps.to_datetime(pf01["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
pf01["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"] = ps.to_datetime(pf01["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
pf01["PROCEDURE_DATE_AWARD_SCHEDULED"] = ps.to_datetime(pf01["PROCEDURE_DATE_AWARD_SCHEDULED"])
pf01["PROCEDURE_DATE_RECEIPT_TENDERS"] = ps.to_datetime(pf01["PROCEDURE_DATE_RECEIPT_TENDERS"])

In [None]:
pf01.info()

In [None]:
#pf01.head()

In [None]:
f01 = pf01.to_spark()

In [None]:
#f01.printSchema()

In [None]:
#f01.write.parquet("s3a_//falk2210/f01.parquet")

In [None]:
#f01.write.parquet("s3a_//falk2210/pri.parquet")

In [None]:
f01.write.json("s3a_//falk2210/pri.json")

In [None]:
f01.write.json("s3a_//falk2210/f01.json")

<a id='f02' />

# F02 contract 

In [None]:
f02 = spark.read.json("falk/F02.json/*.json")
#f02.printSchema()

In [None]:
pf02 = f02.to_pandas_on_spark()

In [None]:
pf02 = pf02.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf02 = pf02.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf02 = pf02.rename(columns=lambda x_ re.sub('F02_2014_', '', x))
#pf02 = pf02.rename(columns=lambda x_ re.sub('_', '_', x))

In [None]:
#pf02.info()

In [None]:
pf02['OBJECT_CONTRACT_OBJECT_DESCR_EU_PROGR_RELATED'].update(pf02['OBJECT_CONTRACT_OBJECT_DESCR_EU_PROGR_RELATED_P'])
pf02['PROCEDURE_FRAMEWORK_JUSTIFICATION'].update(pf02['PROCEDURE_FRAMEWORK_JUSTIFICATION_P'])

In [None]:
#pf02.info()

In [None]:
pf02 = pf02.drop(columns=['OBJECT_CONTRACT_OBJECT_DESCR_EU_PROGR_RELATED_P', 'PROCEDURE_FRAMEWORK_JUSTIFICATION_P'])

In [None]:
#pf02.info()

In [None]:
pf02 = pf02.rename(columns=lambda x_ re.sub('\_P$', '', x))

In [None]:
pf02.info()

In [None]:
pf02["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"] = ps.to_datetime(pf02["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
pf02["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"] = ps.to_datetime(pf02["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
pf02["PROCEDURE_DATE_DISPATCH_INVITATIONS"] = ps.to_datetime(pf02["PROCEDURE_DATE_DISPATCH_INVITATIONS"])
pf02["PROCEDURE_DATE_RECEIPT_TENDERS"] = ps.to_datetime(pf02["PROCEDURE_DATE_RECEIPT_TENDERS"])
pf02["PROCEDURE_DATE_TENDER_VALID"] = ps.to_datetime(pf02["PROCEDURE_DATE_TENDER_VALID"])
pf02["PROCEDURE_OPENING_CONDITION_DATE_OPENING_TENDERS"] = ps.to_datetime(pf02["PROCEDURE_OPENING_CONDITION_DATE_OPENING_TENDERS"])

In [None]:
#pf02.info()

In [None]:
#pf02.head()

In [None]:
f02 = pf02.to_spark()

In [None]:
#f02.write.parquet("s3a_//falk2210/f02.parquet")

In [None]:
#f02.write.parquet("s3a_//falk2210/contract.parquet")

In [None]:
f02.write.mode("append").json("falk2210/contract.json")

In [None]:
f02.write.mode("append").json("s3a_//falk2210/contract.json")

In [None]:
f02.write.mode("append").json("s3a_//falk2210/f02.json")

<a id='f03' />

# F03 AWARD 

In [None]:
f03 = spark.read.json("falk/F03.json/*.json")

In [None]:
f03.printSchema()

In [None]:
pf03 = f03.pandas_api()

In [None]:
pf03.info()

In [None]:
pf03 = pf03.rename(columns=lambda x: re.sub('DOFFIN_ESENDERS:', '', x))
pf03 = pf03.rename(columns=lambda x: re.sub('FORM_SECTION:', '', x))
pf03 = pf03.rename(columns=lambda x: re.sub('F03_2014:', '', x))
#pf03 = pf03.rename(columns=lambda x: re.sub('_', '_', x))
pf03 = pf03.rename(columns=lambda x: re.sub('AWARD_CONTRACT:AWARDED_CONTRACT:', 'AWARDED_CONTRACT:', x))
#pf03.info()

In [None]:
pf03.info()

In [None]:
pf03['OBJECT_CONTRACT:OBJECT_DESCR:EU_PROGR_RELATED'].update(pf03.pop('OBJECT_CONTRACT:OBJECT_DESCR:EU_PROGR_RELATED:P'))
#pf03['AWARDED_CONTRACT_NB_TENDERS_RECEIVED'].update(pf03.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED'))
#pf03['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_EMEANS'].update(pf03.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_EMEANS'))
#pf03['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_NON_EU'].update(pf03.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_NON_EU'))
#pf03['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_OTHER_EU'].update(pf03.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_OTHER_EU'))
#pf03['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_SME'].update(pf03.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_SME'))
#pf03['AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_CURRENCY'].update(pf03.pop('AWARDED_CONTRACT_VAL_ESTIMATED_TOTAL_CURRENCY'))
#pf03['AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_text'].update(pf03.pop('AWARDED_CONTRACT_VAL_ESTIMATED_TOTAL_text'))
#pf03['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_CURRENCY'].update(pf03.pop('AWARDED_CONTRACT_VAL_RANGE_TOTAL_CURRENCY'))
#pf03['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_HIGH'].update(pf03.pop('AWARDED_CONTRACT_VAL_RANGE_TOTAL_HIGH'))
#pf03['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_LOW'].update(pf03.pop('AWARDED_CONTRACT_VAL_RANGE_TOTAL_LOW'))
#pf03['AWARDED_CONTRACT_VALUES_VAL_TOTAL_CURRENCY'].update(pf03.pop('AWARDED_CONTRACT_VAL_TOTAL_CURRENCY'))
#pf03['AWARDED_CONTRACT_VALUES_VAL_TOTAL_text'].update(pf03.pop('AWARDED_CONTRACT_VAL_TOTAL_text'))

In [None]:
#pf03.info()

In [None]:
pf03 = pf03.rename(columns=lambda x: re.sub('\:P$', '', x))

In [None]:
pf03.info()

In [None]:
pf03.shape

In [None]:
#pf03["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf03["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
#pf03["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"] = ps.to_datetime(pf03["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"])

In [None]:
pf03.head(5)

In [None]:
f03 = pf03.to_spark()

In [None]:
f03.write.mode("append").json("falk2210/award.json")

In [None]:
f03

In [None]:
#f03.printSchema()

In [None]:
f03.write.mode("append").json("s3a_//falk2210/f03.json")

In [None]:
f03.write.mode("append").json("s3a_//falk2210/award.json")

In [None]:
#f03.write.parquet("s3a_//falk2210/award.parquet")

In [None]:
#f03.write.mode('append').parquet("s3a_//falk2210/f03.parquet")

# F04 PRI 

<a id='f04' />

In [None]:
f04 = spark.read.json("falk/F04.json/*.json")
#f04.printSchema()

In [None]:
pf04 = f04.to_pandas_on_spark()

In [None]:
pf04 = pf04.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf04 = pf04.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf04 = pf04.rename(columns=lambda x_ re.sub('F04_2014_', '', x))
#pf04 = pf04.rename(columns=lambda x_ re.sub('_', '_', x))
pf04 = pf04.rename(columns=lambda x_ re.sub('\_P$', '', x))

In [None]:
#pf04.info()

In [None]:
pf04["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf04["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
pf04["OBJECT_CONTRACT_DATE_PUBLICATION_NOTICE"] = ps.to_datetime(pf04["OBJECT_CONTRACT_DATE_PUBLICATION_NOTICE"])
pf04["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"] = ps.to_datetime(pf04["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
pf04["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"] = ps.to_datetime(pf04["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
pf04["PROCEDURE_DATE_AWARD_SCHEDULED"] = ps.to_datetime(pf04["PROCEDURE_DATE_AWARD_SCHEDULED"])
pf04["PROCEDURE_DATE_RECEIPT_TENDERS"] = ps.to_datetime(pf04["PROCEDURE_DATE_RECEIPT_TENDERS"])

In [None]:
#pf04.info()

In [None]:
f04 = pf04.to_spark()

In [None]:
#f04.write.parquet("s3a_//falk2210/f04.parquet")

In [None]:
#f04.write.mode('append').parquet("s3a_//falk2210/pri.parquet")

In [None]:
f04.write.mode('append').json("s3a_//falk2210/f04.json")

In [None]:
f04.write.mode('append').json("s3a_//falk2210/pri.json")

In [None]:
#pri = spark.read.json("s3a_//falk2210/pri.json")

In [None]:
#pri = spark.read.parquet("s3a_//falk2210/pri.parquet")

In [None]:
#pri.printSchema()

In [None]:
#p_pri = pri.to_pandas_on_spark()

In [None]:
#p_pri.head()

In [None]:
#p_pri.info()

# F05 contract 

<a id='f05' />

In [None]:
f05 = spark.read.json("falk/F05.json/*.json")
#f05.printSchema()

In [None]:
pf05 = f05.to_pandas_on_spark()

In [None]:
pf05 = pf05.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf05 = pf05.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf05 = pf05.rename(columns=lambda x_ re.sub('F05_2014_', '', x))
#pf05 = pf05.rename(columns=lambda x_ re.sub('_', '_', x))

In [None]:
#pf05.info()

In [None]:
pf05['PROCEDURE_FRAMEWORK_JUSTIFICATION'].update(pf05.pop('PROCEDURE_FRAMEWORK_JUSTIFICATION_P'))

In [None]:
#pf05.info()

In [None]:
pf05 = pf05.rename(columns=lambda x_ re.sub('\_P$', '', x))

In [None]:
#pf05.info()

In [None]:
pf05["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf05["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
pf05["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"] = ps.to_datetime(pf05["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
pf05["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"] = ps.to_datetime(pf05["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
pf05["PROCEDURE_DATE_DISPATCH_INVITATIONS"] = ps.to_datetime(pf05["PROCEDURE_DATE_DISPATCH_INVITATIONS"])
pf05["PROCEDURE_DATE_RECEIPT_TENDERS"] = ps.to_datetime(pf05["PROCEDURE_DATE_RECEIPT_TENDERS"])
pf05["PROCEDURE_DATE_TENDER_VALID"] = ps.to_datetime(pf05["PROCEDURE_DATE_TENDER_VALID"])
pf05["PROCEDURE_OPENING_CONDITION_DATE_OPENING_TENDERS"] = ps.to_datetime(pf05["PROCEDURE_OPENING_CONDITION_DATE_OPENING_TENDERS"])

In [None]:
#pf05.info()

In [None]:
#f05 = pf05.rename(columns=lambda x_ re.sub('', '', x))
#f05 = pf05.rename(columns=lambda x_ re.sub('', '', x))

In [None]:
f05 = pf05.to_spark()

In [None]:
#f05.write.mode('append').parquet("s3a_//falk2210/contract.parquet")

In [None]:
f05.write.mode("append").json("falk2210/contract.json")

In [None]:
f05.write.mode("append").json("s3a_//falk2210/f05.json")

In [None]:
f05.write.mode("append").json("s3a_//falk2210/contract.json")

In [None]:
#f05.write.parquet("s3a_//falk2210/f05.parquet")

In [None]:
#con = spark.read.json("s3a_//falk2210/contract.json")

In [None]:
#con.printSchema()

# F06 AWARD 

<a id='f06' />

In [None]:
f06 = spark.read.json("falk/F06.json/*.json")
#f06.printSchema()

In [None]:
pf06 = f06.to_pandas_on_spark()

In [None]:
#pf06.info()

In [None]:
pf06 = pf06.rename(columns=lambda x: re.sub('DOFFIN_ESENDERS_', '', x))
pf06 = pf06.rename(columns=lambda x: re.sub('FORM_SECTION_', '', x))
pf06 = pf06.rename(columns=lambda x: re.sub('F06_2014_', '', x))
#pf06 = pf06.rename(columns=lambda x: re.sub('_', '_', x))
pf06 = pf06.rename(columns=lambda x: re.sub('\_P$', '', x))
pf06 = pf06.rename(columns=lambda x: re.sub('AWARD_CONTRACT_AWARDED_CONTRACT_', 'AWARDED_CONTRACT_', x))
pf06 = pf06.rename(columns=lambda x: re.sub('AWARDED_CONTRACT_TENDERS_', 'AWARDED_CONTRACT_', x))


In [None]:
pf06['AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_CURRENCY'].update(pf06.pop('AWARDED_CONTRACT_VALUE_VAL_ESTIMATED_TOTAL_CURRENCY'))
pf06['AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_text'].update(pf06.pop('AWARDED_CONTRACT_VALUE_VAL_ESTIMATED_TOTAL_text'))
pf06['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_CURRENCY'].update(pf06.pop('AWARDED_CONTRACT_VALUE_VAL_RANGE_TOTAL_CURRENCY'))
pf06['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_HIGH'].update(pf06.pop('AWARDED_CONTRACT_VALUE_VAL_RANGE_TOTAL_HIGH'))
pf06['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_LOW'].update(pf06.pop('AWARDED_CONTRACT_VALUE_VAL_RANGE_TOTAL_LOW'))
pf06['AWARDED_CONTRACT_VALUES_VAL_TOTAL_CURRENCY'].update(pf06.pop('AWARDED_CONTRACT_VALUE_VAL_TOTAL_CURRENCY'))
pf06['AWARDED_CONTRACT_VALUES_VAL_TOTAL_text'].update(pf06.pop('AWARDED_CONTRACT_VALUE_VAL_TOTAL_text'))
pf06['AWARDED_CONTRACT_VALUES_PUBLICATION'].update(pf06.pop('AWARDED_CONTRACT_VALUE_PUBLICATION'))

In [None]:
#pf06.info()

In [None]:
#pf06[['AWARDED_CONTRACT_VALUE_VAL_ESTIMATED_TOTAL_CURRENCY', 'AWARDED_CONTRACT_VALUE_VAL_ESTIMATED_TOTAL_text', 'AWARDED_CONTRACT_VALUE_VAL_RANGE_TOTAL_CURRENCY', 'AWARDED_CONTRACT_VALUE_VAL_RANGE_TOTAL_HIGH', 'AWARDED_CONTRACT_VALUE_VAL_RANGE_TOTAL_LOW', 'AWARDED_CONTRACT_VALUE_VAL_TOTAL_CURRENCY', 'AWARDED_CONTRACT_VALUE_VAL_TOTAL_text', 'AWARDED_CONTRACT_VALUES_PUBLICATION', 'AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_text', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_HIGH', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_LOW', 'AWARDED_CONTRACT_VALUES_VAL_TOTAL_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_TOTAL_text']].head()

In [None]:
#pf06[['AWARDED_CONTRACT_VALUE_VAL_ESTIMATED_TOTAL_CURRENCY', 'AWARDED_CONTRACT_VALUE_VAL_ESTIMATED_TOTAL_text', 'AWARDED_CONTRACT_VALUES_VAL_TOTAL_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_TOTAL_text']]

In [None]:
#pf06["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf06["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
#pf06["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"] = ps.to_datetime(pf06["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"])
#pf06.info()

In [None]:
f06 = pf06.to_spark()

In [None]:
f06.write.mode("append").json("falk2210/award.json")

In [None]:
f06.write.mode("append").json("s3a_//falk2210/award.json")

In [None]:
f06.write.mode("append").json("s3a_//falk2210/f06.json")

In [None]:
#f06.write.mode('append').parquet("s3a_//falk2210/award.parquet")

In [None]:
#f06.write.mode('append').parquet("s3a_//falk2210/f06.parquet")

In [None]:
#award = spark.read.parquet("s3a_//falk2210/f03.parquet")

In [None]:
#award.withColumn("`COMPLEMENTARY_INFO_ADDRESS_MEDIATION_BODY_POSTAL_CODE'",award"`COMPLEMENTARY_INFO_ADDRESS_MEDIATION_BODY_POSTAL_CODE'".cast('String'))

In [None]:
#award.printSchema()

In [None]:
#award.show(truncate=False, vertical=True, n=1)

In [None]:
#award_ps = award.to_pandas_on_spark()

In [None]:
#award_ps

# F07 QSU 

<a id='f07' />

In [None]:
f07 = spark.read.json("falk/F07.json/*.json")
#f07.printSchema()

In [None]:
pf07 = f07.to_pandas_on_spark()

In [None]:
pf07 = pf07.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf07 = pf07.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf07 = pf07.rename(columns=lambda x_ re.sub('F07_2014_', '', x))
#pf07 = pf07.rename(columns=lambda x_ re.sub('_', '_', x))
pf07 = pf07.rename(columns=lambda x_ re.sub('\_P$', '', x))

In [None]:
#pf07.info()

In [None]:
pf07["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf07["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
pf07["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"] = ps.to_datetime(pf07["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
pf07["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"] = ps.to_datetime(pf07["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
#pf07.info()

In [None]:
#pf07.head()

In [None]:
f07 = pf07.to_spark()

In [None]:
#f07.show(truncate=False, vertical=True)

In [None]:
#f07.write.parquet("s3a_//falk2210/f07.parquet")

In [None]:
#f07.write.mode('append').parquet("s3a_//falk2210/contract.parquet")

In [None]:
f07.write.mode("append").json("s3a_//falk2210/f07.json")

In [None]:
f07.write.mode("append").json("s3a_//falk2210/contract.json")

In [None]:
#con = spark.read.json("s3a_//falk2210/contract.json")
#con.printSchema()

In [None]:
#p_con = con.to_pandas_on_spark()

In [None]:
#p_con.head()

In [None]:
#p_con.info()

# F08 Buyer profil 

<a id='f08' />

In [None]:
f08 = spark.read.json("falk/F08.json/*.json")
#f08.printSchema()

In [None]:
pf08 = f08.to_pandas_on_spark()

In [None]:
pf08 = pf08.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf08 = pf08.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf08 = pf08.rename(columns=lambda x_ re.sub('F08_2014_', '', x))
#pf08 = pf08.rename(columns=lambda x_ re.sub('_', '_', x))
pf08 = pf08.rename(columns=lambda x_ re.sub('\_P$', '', x))

In [None]:
#pf08.info()

In [None]:
pf08["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf08["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])

In [None]:
f08 = pf08.to_spark()

In [None]:
#f08.show(truncate=False, vertical=True)

In [None]:
#f08.write.parquet("s3a_//falk2210/f08.parquet")

In [None]:
f08.write.mode("append").json("s3a_//falk2210/f08.json")

# F12 Contracts 

<a id='f12' />

In [None]:
f12 = spark.read.json("falk/F12.json/*.json")
#f12.printSchema()

In [None]:
pf12 = f12.to_pandas_on_spark()

In [None]:
pf12 = pf12.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf12 = pf12.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf12 = pf12.rename(columns=lambda x_ re.sub('F12_2014_', '', x))
#pf12 = pf12.rename(columns=lambda x_ re.sub('_', '_', x))

In [None]:
#pf12.info()

In [None]:
pf12['LEFTI_PARTICULAR_PROFESSION'].update(pf12.pop('LEFTI_PARTICULAR_PROFESSION_P'))

In [None]:
pf12["PROCEDURE_DATE_DISPATCH_INVITATIONS"] = ps.to_datetime(pf12["PROCEDURE_DATE_DISPATCH_INVITATIONS"])
pf12["PROCEDURE_DATE_RECEIPT_TENDERS"] = ps.to_datetime(pf12["PROCEDURE_DATE_RECEIPT_TENDERS"])
#pf12.info()

In [None]:
pf12 = pf12.rename(columns=lambda x_ re.sub('\_P$', '', x))

In [None]:
#pf12.info()

In [None]:
f12 = pf12.to_spark()

In [None]:
f12.write.mode("append").json("falk2210/contract.json")

In [None]:
f12.write.mode("append").json("s3a_//falk2210/f12.json")

In [None]:
f12.write.mode("append").json("s3a_//falk2210/contract.json")

In [None]:
#f12.show(n=1, truncate=False, vertical=True)

In [None]:
#f12.write.mode('append').parquet("s3a_//falk2210/contract.parquet")

In [None]:
#f12.write.parquet("s3a_//falk2210/f12.parquet")

In [None]:
#f12.write.parquet("s3a_//falk2210/f12.parquet")

In [None]:
#con = spark.read.json("s3a_//falk2210/contract.json")

In [None]:
#con.printSchema()

# F13 Awards 

<a id='f13' />

In [None]:
f13 = spark.read.json("falk/F13.json/*.json")
#f13.printSchema()

In [None]:
pf13 = f13.to_pandas_on_spark()

In [None]:
pf13 = pf13.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf13 = pf13.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf13 = pf13.rename(columns=lambda x_ re.sub('F13_2014_', '', x))
#pf13 = pf13.rename(columns=lambda x_ re.sub('_', '_', x))
pf13 = pf13.rename(columns=lambda x_ re.sub('\_P$', '', x))

In [None]:
#pf13.info()

In [None]:
pf13 = pf13.rename(columns=lambda x_ re.sub('RESULTS_', 'AWARDED_CONTRACT_', x))
pf13 = pf13.rename(columns=lambda x_ re.sub('AWARDED_PRIZE_', 'AWARD_CONTRACT_', x))
pf13 = pf13.rename(columns=lambda x_ re.sub('NO_AWARDED_PRIZE_', 'NO_AWARDED_CONTRACT_', x))
#pf13.info()

In [None]:
pf13 = pf13.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_AWARD_CONTRACT_WINNERS_', 'AWARD_CONTRACT_AWARDED_CONTRACT_CONTRACTORS_', x))
pf13 = pf13.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_AWARD_CONTRACT_WINNER.', 'AWARD_CONTRACT_AWARDED_CONTRACT_CONTRACTOR_', x))
pf13 = pf13.rename(columns=lambda x_ re.sub('ADDRESS_WINNER_', 'ADDRESS_CONTRACTOR_', x))
#pf13.info()

In [None]:
pf13 = pf13.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_NO_AWARD_CONTRACT_', 'AWARD_CONTRACT_NO_AWARDED_CONTRACT_', x))
pf13 = pf13.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_AWARD_CONTRACT_WINNER.', 'AWARD_CONTRACT_AWARDED_CONTRACT_CONTRACTOR_', x))
pf13 = pf13.rename(columns=lambda x_ re.sub('ADDRESS_WINNER_', 'ADDRESS_CONTRACTOR_', x))
#pf13.info()

In [None]:
pf13 = pf13.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_AWARD_CONTRACT_', 'AWARD_CONTRACT_AWARDED_CONTRACT_', x))
#pf13.info()

In [None]:
pf13 = pf13.rename(columns=lambda x_ re.sub('WINNER_', 'CONTRACTOR_', x))
#pf13.info()

In [None]:
pf13 = pf13.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_AWARDED_CONTRACT_', 'AWARDED_CONTRACT_', x))

In [None]:
pf13 = pf13.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_VAL_PRIZE_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_TOTAL_CURRENCY', x))
pf13 = pf13.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_VAL_PRIZE_text', 'AWARDED_CONTRACT_VALUES_VAL_TOTAL_text', x))

In [None]:
pf13['AWARDED_CONTRACT_NB_PARTICIPANTS'].update(pf13.pop('AWARDED_CONTRACT_PARTICIPANTS_NB_PARTICIPANTS'))
pf13['AWARDED_CONTRACT_NB_PARTICIPANTS_OTHER_EU'].update(pf13.pop('AWARDED_CONTRACT_PARTICIPANTS_NB_PARTICIPANTS_OTHER_EU'))
pf13['AWARDED_CONTRACT_NB_PARTICIPANTS_SME'].update(pf13.pop('AWARDED_CONTRACT_PARTICIPANTS_NB_PARTICIPANTS_SME'))
#pf13['AWARDED_CONTRACT_PARTICIPANTS_NB_PARTICIPANTS_SME'].update(pf13.pop('AWARDED_CONTRACT_PARTICIPANTS_NB_PARTICIPANTS_SME'))
#pf13['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_SME'].update(pf13.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_SME'))

In [None]:
#pf13.info()

In [None]:
#pf13["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf13["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
#pf13["AWARDED_CONTRACT_DATE_DECISION_JURY"] = ps.to_datetime(pf13["AWARDED_CONTRACT_DATE_DECISION_JURY"])
#pf13.info()

In [None]:
f13 = pf13.to_spark()

In [None]:
f13.write.mode("append").json("falk2210/award.json")

In [None]:
f13.write.mode("append").json("s3a_//falk2210/f13.json")

In [None]:
f13.write.mode("append").json("s3a_//falk2210/award.json")

In [None]:
#f13.write.parquet("s3a_//falk2210/f13.parquet")

In [None]:
#f13.write.mode('append').parquet("s3a_//falk2210/award.parquet")

# F14 Updates 

<a id='f14' />

In [None]:
f14 = spark.read.json("falk/F14.json/*.json")
#f14.printSchema()

In [None]:
pf14 = f14.to_pandas_on_spark()

In [None]:
pf14 = pf14.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf14 = pf14.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf14 = pf14.rename(columns=lambda x_ re.sub('F14_2014_', '', x))
#pf14 = pf14.rename(columns=lambda x_ re.sub('_', '_', x))
pf14 = pf14.rename(columns=lambda x_ re.sub('\_P$', '', x))

In [None]:
#pf14.info()

In [None]:
f14 = pf14.to_spark()

In [None]:
f14.write.mode("append").json("s3a_//falk2210/f14.json")

In [None]:
f14.write.mode("append").json("s3a_//falk2210/updates.json")

In [None]:
#f14.write.parquet("s3a_//falk2210/f14.parquet")

# F15 AWARD 

<a id='f15' />

In [None]:
f15 = spark.read.json("falk/F15.json/*.json")
#f15.printSchema()

In [None]:
pf15 = f15.to_pandas_on_spark()

In [None]:
pf15 = pf15.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf15 = pf15.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf15 = pf15.rename(columns=lambda x_ re.sub('F15_2014_', '', x))
#pf15 = pf15.rename(columns=lambda x_ re.sub('_', '_', x))
pf15 = pf15.rename(columns=lambda x_ re.sub('\_P$', '', x))
pf15 = pf15.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_AWARDED_CONTRACT_', 'AWARDED_CONTRACT_', x))

In [None]:
#pf15.info()

In [None]:
pf15['AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_CURRENCY'].update(pf15.pop('AWARDED_CONTRACT_VALUE_VAL_ESTIMATED_TOTAL_CURRENCY')) 
pf15['AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_CURRENCY'].update(pf15.pop('AWARDED_CONTRACT_VAL_ESTIMATED_TOTAL_CURRENCY'))
pf15['AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_text'].update(pf15.pop('AWARDED_CONTRACT_VALUE_VAL_ESTIMATED_TOTAL_text'))
pf15['AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_text'].update(pf15.pop('AWARDED_CONTRACT_VAL_ESTIMATED_TOTAL_text'))
pf15['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_CURRENCY'].update(pf15.pop('AWARDED_CONTRACT_VAL_RANGE_TOTAL_CURRENCY'))
pf15['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_HIGH'].update(pf15.pop('AWARDED_CONTRACT_VAL_RANGE_TOTAL_HIGH'))
pf15['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_LOW'].update(pf15.pop('AWARDED_CONTRACT_VAL_RANGE_TOTAL_LOW'))
pf15['AWARDED_CONTRACT_VALUES_VAL_TOTAL_CURRENCY'].update(pf15.pop('AWARDED_CONTRACT_VALUE_VAL_TOTAL_CURRENCY'))
pf15['AWARDED_CONTRACT_VALUES_VAL_TOTAL_CURRENCY'].update(pf15.pop('AWARDED_CONTRACT_VAL_TOTAL_CURRENCY'))
pf15['AWARDED_CONTRACT_VALUES_VAL_TOTAL_text'].update(pf15.pop('AWARDED_CONTRACT_VALUE_VAL_TOTAL_text'))
pf15['AWARDED_CONTRACT_VALUES_VAL_TOTAL_text'].update(pf15.pop('AWARDED_CONTRACT_VAL_TOTAL_text'))
pf15['AWARDED_CONTRACT_VALUES_PUBLICATION'].update(pf15.pop('AWARDED_CONTRACT_VALUE_PUBLICATION'))

In [None]:
pf15 = pf15.rename(columns=lambda x_ re.sub('PROCEDURE_DIRECTIVE_2009_81_EC_PT_AWARD_CONTRACT_WITHOUT_CALL_D_JUSTIFICATION', 'PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_CALL_D_JUSTIFICATION', x))
pf15 = pf15.rename(columns=lambda x_ re.sub('PROCEDURE_DIRECTIVE_2009_81_EC_PT_NEGOTIATED_WITHOUT_PUBLICATION_D_JUSTIFICATION', 'PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_PUBLICATION_D_JUSTIFICATION', x))

In [None]:
#pf15['PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_CALL_D_JUSTIFICATION'].update(pf15.pop('PROCEDURE_DIRECTIVE_2009_81_EC_PT_AWARD_CONTRACT_WITHOUT_CALL_D_JUSTIFICATION'))
#pf15['PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_PUBLICATION_D_JUSTIFICATION'].update(pf15.pop('PROCEDURE_DIRECTIVE_2009_81_EC_PT_NEGOTIATED_WITHOUT_PUBLICATION_D_JUSTIFICATION'))
pf15['PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_CALL_D_JUSTIFICATION'].update(pf15.pop('PROCEDURE_DIRECTIVE_2014_23_EU_PT_AWARD_CONTRACT_WITHOUT_CALL_D_JUSTIFICATION'))
pf15['PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_PUBLICATION_D_JUSTIFICATION'].update(pf15.pop('PROCEDURE_DIRECTIVE_2014_24_EU_PT_AWARD_CONTRACT_WITHOUT_CALL_D_JUSTIFICATION'))
pf15['PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_CALL_D_JUSTIFICATION'].update(pf15.pop('PROCEDURE_DIRECTIVE_2014_25_EU_PT_AWARD_CONTRACT_WITHOUT_CALL_D_JUSTIFICATION'))
pf15['PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_PUBLICATION_D_JUSTIFICATION'].update(pf15.pop('PROCEDURE_DIRECTIVE_2014_24_EU_PT_NEGOTIATED_WITHOUT_PUBLICATION_D_JUSTIFICATION'))
#pf15['PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_CALL_D_JUSTIFICATION'].update(pf15.pop(''))
pf15['PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_PUBLICATION_D_JUSTIFICATION'].update(pf15.pop('PROCEDURE_DIRECTIVE_2014_25_EU_PT_NEGOTIATED_WITHOUT_PUBLICATION_D_JUSTIFICATION'))

In [None]:
#pf15.info()

In [None]:
#pf15[['AWARDED_CONTRACT_VALUE_VAL_ESTIMATED_TOTAL_CURRENCY', 'AWARDED_CONTRACT_VALUE_VAL_ESTIMATED_TOTAL_text', 'AWARDED_CONTRACT_VALUES_VAL_TOTAL_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_TOTAL_text']]

In [None]:
#pf15["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf15["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
#pf15["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"] = ps.to_datetime(pf15["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"])
#pf15.info()

In [None]:
f15 = pf15.to_spark()

In [None]:
f15.write.mode("append").json("falk2210/award.json")

In [None]:
f15.write.mode("append").json("s3a_//falk2210/f15.json")

In [None]:
f15.write.mode("append").json("s3a_//falk2210/award.json")

In [None]:
#f15.write.parquet("s3a_//falk2210/f15.parquet")

In [None]:
#f15.write.mode('append').parquet("s3a_//falk2210/award.parquet")

# F20 AWARD MODIFICATIONS 

<a id='f20' />

In [None]:
f20 = spark.read.json("falk/F20.json/*.json")
#f20.printSchema()

In [None]:
pf20 = f20.to_pandas_on_spark()

In [None]:
pf20 = pf20.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf20 = pf20.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf20 = pf20.rename(columns=lambda x_ re.sub('F20_2014_', '', x))
#pf20 = pf20.rename(columns=lambda x_ re.sub('_', '_', x))
pf20 = pf20.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_AWARDED_CONTRACT_', 'AWARDED_CONTRACT_', x))
#pf20.info()

In [None]:
pf20['OBJECT_CONTRACT_OBJECT_DESCR_JUSTIFICATION'].update(pf20.pop('OBJECT_CONTRACT_OBJECT_DESCR_JUSTIFICATION_P'))
pf20['MODIFICATIONS_CONTRACT_DESCRIPTION_PROCUREMENT_JUSTIFICATION'].update(pf20.pop('MODIFICATIONS_CONTRACT_DESCRIPTION_PROCUREMENT_JUSTIFICATION_P'))

In [None]:
#pf20.info()

In [None]:
pf20 = pf20.rename(columns=lambda x_ re.sub('\_P$', '', x))
#pf20.info()

In [None]:
pf20["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf20["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
pf20["MODIFICATIONS_CONTRACT_DESCRIPTION_PROCUREMENT_DATE_END"] = ps.to_datetime(pf20["MODIFICATIONS_CONTRACT_DESCRIPTION_PROCUREMENT_DATE_END"])
pf20["MODIFICATIONS_CONTRACT_DESCRIPTION_PROCUREMENT_DATE_START"] = ps.to_datetime(pf20["MODIFICATIONS_CONTRACT_DESCRIPTION_PROCUREMENT_DATE_START"])
pf20["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"] = ps.to_datetime(pf20["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
pf20["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"] = ps.to_datetime(pf20["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
#pf20.info()

In [None]:
#pf20.head()

In [None]:
f20 = pf20.to_spark()

In [None]:
f20.write.mode("append").json("s3a_//falk2210/f20.json")

In [None]:
#f20.write.mode("append").json("s3a_//falk2210/award.json")

In [None]:
f20.write.parquet("s3a_//falk2210/f20.parquet")

# F21 AWARD_CONTRACT_NO_AWARDED_CONTRACT

<a id='f21' />

In [None]:
f21 = spark.read.json("falk/F21.json/*.json")
#f21.printSchema()

In [None]:
pf21 = f21.to_pandas_on_spark()

In [None]:
pf21 = pf21.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf21 = pf21.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf21 = pf21.rename(columns=lambda x_ re.sub('F21_2014_', '', x))
#pf21 = pf21.rename(columns=lambda x_ re.sub('_', '_', x))
pf21 = pf21.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_AWARDED_CONTRACT.', 'AWARDED_CONTRACT_', x))
#pf21.info()

In [None]:
pf21['PROCEDURE_FRAMEWORK_JUSTIFICATION'].update(pf21.pop('PROCEDURE_FRAMEWORK_JUSTIFICATION_P'))

In [None]:
#pf21.info()

In [None]:
pf21 = pf21.rename(columns=lambda x_ re.sub('\_P$', '', x))

In [None]:
pf21['AWARDED_CONTRACT_NB_TENDERS_RECEIVED'].update(pf21.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED'))
pf21['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_EMEANS'].update(pf21.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_EMEANS'))
pf21['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_NON_EU'].update(pf21.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_NON_EU'))
pf21['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_OTHER_EU'].update(pf21.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_OTHER_EU'))
pf21['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_SME'].update(pf21.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_SME'))

In [None]:
pf21['AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_CURRENCY'].update(pf21.pop('AWARDED_CONTRACT_VAL_ESTIMATED_TOTAL_CURRENCY'))
pf21['AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_text'].update(pf21.pop('AWARDED_CONTRACT_VAL_ESTIMATED_TOTAL_text'))
pf21['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_CURRENCY'].update(pf21.pop('AWARDED_CONTRACT_VAL_RANGE_TOTAL_CURRENCY'))
pf21['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_HIGH'].update(pf21.pop('AWARDED_CONTRACT_VAL_RANGE_TOTAL_HIGH'))
pf21['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_LOW'].update(pf21.pop('AWARDED_CONTRACT_VAL_RANGE_TOTAL_LOW'))
pf21['AWARDED_CONTRACT_VALUES_VAL_TOTAL_CURRENCY'].update(pf21.pop('AWARDED_CONTRACT_VAL_TOTAL_CURRENCY'))
pf21['AWARDED_CONTRACT_VALUES_VAL_TOTAL_text'].update(pf21.pop('AWARDED_CONTRACT_VAL_TOTAL_text'))

In [None]:
#pf21.info()

In [None]:
"""
pf21["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf21["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
pf21["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"] = ps.to_datetime(pf21["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"])
pf21["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"] = ps.to_datetime(pf21["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
pf21["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"] = ps.to_datetime(pf21["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
pf21["PROCEDURE_DATE_AWARD_SCHEDULED"] = ps.to_datetime(pf21["PROCEDURE_DATE_AWARD_SCHEDULED"])
pf21["PROCEDURE_DATE_RECEIPT_TENDERS"] = ps.to_datetime(pf21["PROCEDURE_DATE_RECEIPT_TENDERS"])
"""

In [None]:
#pf21.info()

In [None]:
f21 = pf21.to_spark()

In [None]:
f21.write.mode("append").json("falk2210/award.json")

In [None]:
f21.write.mode("append").json("s3a_//falk2210/f21.json")

In [None]:
f21.write.mode("append").json("s3a_//falk2210/award.json")

In [None]:
#f21.write.parquet("s3a_//falk2210/f21.parquet")

In [None]:
#f21.write.mode("append").parquet("s3a_//falk2210/award.parquet")

# F22 AWARD_CONTRACT_NO_AWARDED_CONTRACT

<a id='f22' />

In [None]:
f22 = spark.read.json("falk/F22.json/*.json")
#f22.printSchema()

In [None]:
pf22 = f22.to_pandas_on_spark()

In [None]:
pf22 = pf22.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf22 = pf22.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf22 = pf22.rename(columns=lambda x_ re.sub('F22_2014_', '', x))
#pf22 = pf22.rename(columns=lambda x_ re.sub('_', '_', x))
pf22 = pf22.rename(columns=lambda x_ re.sub('\_P$', '', x))
pf22 = pf22.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_AWARDED_CONTRACT.', 'AWARDED_CONTRACT_', x))

In [None]:
#pf22.info()

In [None]:
pf22['AWARDED_CONTRACT_NB_TENDERS_RECEIVED'].update(pf22.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED'))
pf22['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_EMEANS'].update(pf22.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_EMEANS'))
#pf22['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_NON_EU'].update(pf22.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_NON_EU'))
#pf22['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_OTHER_EU'].update(pf22.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_OTHER_EU'))
#pf22['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_SME'].update(pf22.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_SME'))

In [None]:
pf22['AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_CURRENCY'].update(pf22.pop('AWARDED_CONTRACT_VAL_ESTIMATED_TOTAL_CURRENCY'))
pf22['AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_text'].update(pf22.pop('AWARDED_CONTRACT_VAL_ESTIMATED_TOTAL_text'))
pf22['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_CURRENCY'].update(pf22.pop('AWARDED_CONTRACT_VAL_RANGE_TOTAL_CURRENCY'))
pf22['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_HIGH'].update(pf22.pop('AWARDED_CONTRACT_VAL_RANGE_TOTAL_HIGH'))
pf22['AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_LOW'].update(pf22.pop('AWARDED_CONTRACT_VAL_RANGE_TOTAL_LOW'))
pf22['AWARDED_CONTRACT_VALUES_VAL_TOTAL_CURRENCY'].update(pf22.pop('AWARDED_CONTRACT_VAL_TOTAL_CURRENCY'))
pf22['AWARDED_CONTRACT_VALUES_VAL_TOTAL_text'].update(pf22.pop('AWARDED_CONTRACT_VAL_TOTAL_text'))

In [None]:
#pf22.info()

In [None]:
"""
pf22["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"] = ps.to_datetime(pf22["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"])
pf22["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf22["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
pf22["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"] = ps.to_datetime(pf22["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
pf22["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"] = ps.to_datetime(pf22["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
pf22["PROCEDURE_DATE_RECEIPT_TENDERS"] = ps.to_datetime(pf22["PROCEDURE_DATE_RECEIPT_TENDERS"])
"""

In [None]:
#pf22.info()

In [None]:
f22 = pf22.to_spark()

In [None]:
f22.write.mode("append").json("falk2210/award.json")

In [None]:
f22.write.mode("append").json("s3a_//falk2210/f22.json")

In [None]:
f22.write.mode("append").json("s3a_//falk2210/award.json")

In [None]:
#f22.write.parquet("s3a_//falk2210/f22.parquet")

In [None]:
#f22.write.mode("append").parquet("s3a_//falk2210/award.parquet")

# F23 AWARD_CONTRACT_NO_AWARDED_CONTRACT

<a id='f23' />

In [None]:
f23 = spark.read.json("falk/F23.json/*.json")
#f23.printSchema()

In [None]:
pf23 = f23.to_pandas_on_spark()

In [None]:
pf23 = pf23.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf23 = pf23.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf23 = pf23.rename(columns=lambda x_ re.sub('F23_2014_', '', x))
#pf23 = pf23.rename(columns=lambda x_ re.sub('_', '_', x))
pf23 = pf23.rename(columns=lambda x_ re.sub('\_P$', '', x))
pf23 = pf23.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_AWARDED_CONTRACT_', 'AWARDED_CONTRACT_', x))

In [None]:
pf23['AWARDED_CONTRACT_NB_TENDERS_RECEIVED'].update(pf23.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED'))
pf23['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_EMEANS'].update(pf23.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_EMEANS'))
#pf23['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_NON_EU'].update(pf23.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_NON_EU'))
pf23['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_OTHER_EU'].update(pf23.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_OTHER_EU'))
#pf23['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_SME'].update(pf23.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_SME'))

In [None]:
pf23 = pf23.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_NON_EU', 'AWARDED_CONTRACT_NB_TENDERS_RECEIVED_NON_EU', x))
pf23 = pf23.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_SME', 'AWARDED_CONTRACT_NB_TENDERS_RECEIVED_SME', x))

In [None]:
pf23['AWARDED_CONTRACT_VALUES_VAL_TOTAL_CURRENCY'].update(pf23.pop('AWARDED_CONTRACT_VAL_TOTAL_CURRENCY'))
pf23['AWARDED_CONTRACT_VALUES_VAL_TOTAL_text'].update(pf23.pop('AWARDED_CONTRACT_VAL_TOTAL_text'))

In [None]:
pf23 = pf23.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_VAL_ESTIMATED_TOTAL_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_CURRENCY', x))
pf23 = pf23.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_VAL_ESTIMATED_TOTAL_text', 'AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_text', x))
pf23 = pf23.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_VAL_PRICE_PAYMENT_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_PRICE_PAYMENT_CURRENCY', x))
pf23 = pf23.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_VAL_PRICE_PAYMENT_text', 'AWARDED_CONTRACT_VALUES_VAL_PRICE_PAYMENT_text', x))
pf23 = pf23.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_VAL_REVENUE_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_REVENUE_CURRENCY', x))
pf23 = pf23.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_VAL_REVENUE_text', 'AWARDED_CONTRACT_VALUES_VAL_REVENUE_text', x))

In [None]:
#pf23.info()

In [None]:
"""
pf23["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"] = ps.to_datetime(pf23["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"])
pf23["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf23["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
pf23["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"] = ps.to_datetime(pf23["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
pf23["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"] = ps.to_datetime(pf23["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
pf23["PROCEDURE_DATE_RECEIPT_TENDERS"] = ps.to_datetime(pf23["PROCEDURE_DATE_RECEIPT_TENDERS"])
"""

In [None]:
#pf23.info()

In [None]:
f23 = pf23.to_spark()

In [None]:
f23.write.mode("append").json("falk2210/award.json")

In [None]:
f23.write.mode("append").json("s3a_//falk2210/f23.json")

In [None]:
f23.write.mode("append").json("s3a_//falk2210/award.json")

In [None]:
#f23.write.parquet("s3a_//falk2210/f23.parquet")

In [None]:
#f23.write.mode("append").parquet("s3a_//falk2210/award.parquet")

# F24 Contract

<a id='f24' />

In [None]:
f24 = spark.read.json("falk/F24.json/*.json")
#f24.printSchema()

In [None]:
pf24 = f24.to_pandas_on_spark()

In [None]:
pf24 = pf24.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf24 = pf24.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf24 = pf24.rename(columns=lambda x_ re.sub('F24_2014_', '', x))
#pf24 = pf24.rename(columns=lambda x_ re.sub('_', '_', x))
pf24 = pf24.rename(columns=lambda x_ re.sub('\_P$', '', x))

In [None]:
#pf24.info()

In [None]:
pf24["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf24["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
pf24["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"] = ps.to_datetime(pf24["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
pf24["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"] = ps.to_datetime(pf24["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
pf24["PROCEDURE_DATE_RECEIPT_TENDERS"] = ps.to_datetime(pf24["PROCEDURE_DATE_RECEIPT_TENDERS"])

In [None]:
#pf24.info()

In [None]:
f24 = pf24.to_spark()

In [None]:
f24.write.mode("append").json("falk2210/contract.json")

In [None]:
#f24.show(n=1, truncate=False, vertical=True)

In [None]:
f24.write.parquet("s3a_//falk2210/f24.parquet")

In [None]:
f24.write.mode("append").json("s3a_//falk2210/f24.json")

In [None]:
f24.write.mode("append").json("s3a_//falk2210/contract.json")

In [None]:
#f24.write.mode("append").parquet("s3a_//falk2210/contract.parquet")

# F25 award

<a id='f25' />

In [None]:
f25 = spark.read.json("falk/F25.json/*.json")
#f25.printSchema()

In [None]:
pf25 = f25.to_pandas_on_spark()

In [None]:
pf25 = pf25.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf25 = pf25.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf25 = pf25.rename(columns=lambda x_ re.sub('F25_2014_', '', x))
#pf25 = pf25.rename(columns=lambda x_ re.sub('_', '_', x))
pf25 = pf25.rename(columns=lambda x_ re.sub('\_P$', '', x))
pf25 = pf25.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_AWARDED_CONTRACT_', 'AWARDED_CONTRACT_', x))

In [None]:
pf25['AWARDED_CONTRACT_NB_TENDERS_RECEIVED'].update(pf25.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED'))
pf25['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_EMEANS'].update(pf25.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_EMEANS'))
pf25['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_NON_EU'].update(pf25.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_NON_EU'))
pf25['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_OTHER_EU'].update(pf25.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_OTHER_EU'))
pf25['AWARDED_CONTRACT_NB_TENDERS_RECEIVED_SME'].update(pf25.pop('AWARDED_CONTRACT_TENDERS_NB_TENDERS_RECEIVED_SME'))

In [None]:
pf25['AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_CURRENCY'].update(pf25.pop('AWARDED_CONTRACT_VAL_ESTIMATED_TOTAL_CURRENCY'))
pf25['AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_text'].update(pf25.pop('AWARDED_CONTRACT_VAL_ESTIMATED_TOTAL_text'))
pf25['AWARDED_CONTRACT_VALUES_VAL_TOTAL_CURRENCY'].update(pf25.pop('AWARDED_CONTRACT_VAL_TOTAL_CURRENCY'))
pf25['AWARDED_CONTRACT_VALUES_VAL_TOTAL_text'].update(pf25.pop('AWARDED_CONTRACT_VAL_TOTAL_text'))

In [None]:
#pf25.info()

In [None]:
"""
pf25["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf25["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
pf25["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"] = ps.to_datetime(pf25["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"])
pf25["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"] = ps.to_datetime(pf25["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
pf25["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"] = ps.to_datetime(pf25["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
"""

In [None]:
#pf25.info()

In [None]:
f25 = pf25.to_spark()

In [None]:
f25.write.mode("append").json("falk2210/award.json")

In [None]:
f25.write.mode("append").json("s3a_//falk2210/f25.json")

In [None]:
f25.write.mode("append").json("s3a_//falk2210/award.json")

In [None]:
#f25.write.parquet("s3a_//falk2210/f25.parquet")

In [None]:
#f25.write.mode("append").parquet("s3a_//falk2210/award.parquet")

# F52 Contract

<a id='f52' />

In [None]:
f52 = spark.read.json("falk/F52.json/*.json")
#f52.printSchema()

In [None]:
pf52 = f52.to_pandas_on_spark()

In [None]:
pf52 = pf52.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf52 = pf52.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf52 = pf52.rename(columns=lambda x_ re.sub('F52_2014_', '', x))
#pf52 = pf52.rename(columns=lambda x_ re.sub('_', '_', x))
pf52 = pf52.rename(columns=lambda x_ re.sub('\_P$', '', x))
#pf52.info()

In [None]:
pf52["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf52["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
pf52["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"] = ps.to_datetime(pf52["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
pf52["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"] = ps.to_datetime(pf52["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
pf52["PROCEDURE_DATE_DISPATCH_INVITATIONS"] = ps.to_datetime(pf52["PROCEDURE_DATE_DISPATCH_INVITATIONS"])
pf52["PROCEDURE_DATE_RECEIPT_TENDERS"] = ps.to_datetime(pf52["PROCEDURE_DATE_RECEIPT_TENDERS"])

In [None]:
#pf52.info()

In [None]:
f52 = pf52.to_spark()

In [None]:
f52.write.mode("append").json("falk2210/contract.json")

In [None]:
f52.write.mode("append").json("s3a_//falk2210/f52.json")

In [None]:
f52.write.mode("append").json("s3a_//falk2210/contract.json")

In [None]:
#f52.write.parquet("s3a_//falk2210/f52.parquet")

In [None]:
#f52.write.mode('append').parquet("s3a_//falk2210/contract.parquet")

In [None]:
#con = spark.read.json("s3a_//falk2210/contract.json")
#con.printSchema()

In [None]:
#p_con = con.to_pandas_on_spark()

In [None]:
#p_con.head()

In [None]:
#p_con.info()

# F65 AWARD

<a id='f65' />

In [None]:
f65 = spark.read.json("falk/F65.json/*.json")
#f65.printSchema()

In [None]:
pf65 = f65.to_pandas_on_spark()

In [None]:
pf65 = pf65.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf65 = pf65.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf65 = pf65.rename(columns=lambda x_ re.sub('F65_2014_', '', x))
#pf65 = pf65.rename(columns=lambda x_ re.sub('_', '_', x))
pf65 = pf65.rename(columns=lambda x_ re.sub('\_P$', '', x))
pf65 = pf65.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_AWARDED_CONTRACT_', 'AWARDED_CONTRACT_', x))

In [None]:
pf65 = pf65.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_VAL_RANGE_TOTAL_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_CURRENCY', x))
pf65 = pf65.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_VAL_RANGE_TOTAL_HIGH', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_HIGH', x))
pf65 = pf65.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_VAL_RANGE_TOTAL_LOW', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_LOW', x))
pf65 = pf65.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_VAL_TOTAL_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_TOTAL_CURRENCY', x))
pf65 = pf65.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_VAL_TOTAL_text', 'AWARDED_CONTRACT_VALUES_VAL_TOTAL_text', x))

In [None]:
#pf65.info()

In [None]:
"""
pf65["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf65["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
pf65["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"] = ps.to_datetime(pf65["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"])
"""

In [None]:
#pf65.info()

In [None]:
f65 = pf65.to_spark()

In [None]:
f65.write.mode("append").json("falk2210/award.json")

In [None]:
f65.write.mode("append").json("s3a_//falk2210/f65.json")

In [None]:
f65.write.mode("append").json("s3a_//falk2210/award.json")

In [None]:
#f65.write.parquet("s3a_//falk2210/f65.parquet")

In [None]:
#f65.write.mode('append').parquet("s3a_//falk2210/award.parquet")

# Read all awards

In [None]:
awa = spark.read.json("falk2210/award.json/*.json")

In [None]:
p_awa = awa.to_pandas_on_spark()

In [None]:
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_COST_AC_CRITERION'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_AC_AC_COST_AC_CRITERION'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_COST_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_AC_AC_COST_AC_WEIGHTING'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_CRITERION'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_AC_AC_CRITERION'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_PRICE_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_AC_AC_PRICE_AC_WEIGHTING'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_QUALITY_AC_CRITERION'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_AC_AC_QUALITY_AC_CRITERION'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_QUALITY_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_AC_AC_QUALITY_AC_WEIGHTING'))

In [None]:
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_CRITERION'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_23_EU_AC_CRITERION')) 
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_COST_AC_CRITERION'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_24_EU_AC_AC_COST_AC_CRITERION'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_COST_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_24_EU_AC_AC_COST_AC_WEIGHTING'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_PRICE_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_24_EU_AC_AC_PRICE_AC_WEIGHTING')) 
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_QUALITY_AC_CRITERION'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_24_EU_AC_AC_QUALITY_AC_CRITERION'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_QUALITY_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_24_EU_AC_AC_QUALITY_AC_WEIGHTING'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_COST_AC_CRITERION'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_24_EU_AC_COST_AC_CRITERION')) 
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_COST_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_24_EU_AC_COST_AC_WEIGHTING'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_PRICE_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_24_EU_AC_PRICE_AC_WEIGHTING'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_QUALITY_AC_CRITERION'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_24_EU_AC_QUALITY_AC_CRITERION'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_QUALITY_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_24_EU_AC_QUALITY_AC_WEIGHTING'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_COST_AC_CRITERION'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_25_EU_AC_AC_COST_AC_CRITERION'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_COST_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_25_EU_AC_AC_COST_AC_WEIGHTING')) 
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_PRICE_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_25_EU_AC_AC_PRICE_AC_WEIGHTING'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_QUALITY_AC_CRITERION'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_25_EU_AC_AC_QUALITY_AC_CRITERION'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_QUALITY_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_25_EU_AC_AC_QUALITY_AC_WEIGHTING')) 
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_PUBLICATION'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_25_EU_AC_PUBLICATION'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_PRICE_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_25_EU_AC_PRICE_AC_WEIGHTING'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_QUALITY_AC_CRITERION'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_25_EU_AC_QUALITY_AC_CRITERION'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_QUALITY_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2014_25_EU_AC_QUALITY_AC_WEIGHTING'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_COST'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_AC_AC_COST')) 
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_QUALITY'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_AC_AC_QUALITY'))

In [None]:
p_awa = p_awa.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2009_81_EC_AC_AC_CRITERIA_AC_CRITERION', 'OBJECT_CONTRACT_OBJECT_DESCR_AC_CRITERIA_AC_CRITERION', x))
p_awa = p_awa.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2009_81_EC_AC_AC_CRITERIA_AC_WEIGHTING', 'OBJECT_CONTRACT_OBJECT_DESCR_AC_CRITERIA_AC_WEIGHTING', x))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_CRITERIA_AC_CRITERION'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2009_81_EC_AC_CRITERIA_AC_CRITERION'))
p_awa['OBJECT_CONTRACT_OBJECT_DESCR_AC_CRITERIA_AC_WEIGHTING'].update(p_awa.pop('OBJECT_CONTRACT_OBJECT_DESCR_DIRECTIVE_2009_81_EC_AC_CRITERIA_AC_WEIGHTING'))

In [None]:
p_awa.info()

In [None]:
fp_awa = p_awa.to_spark()
fp_awa.write.mode("append").json("falk2210/all_award.json")

In [None]:
p_awa = p_awa.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_', '', x))
p_awa = p_awa.rename(columns=lambda x_ re.sub('_PROCUREMENT_DISCONTINUED', '', x))
p_awa = p_awa.rename(columns=lambda x_ re.sub('COMPLEMENTARY_INFO_', '', x))
p_awa = p_awa.rename(columns=lambda x_ re.sub('CONTRACTING_BODY_', '', x))
p_awa = p_awa.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_', '', x))
p_awa = p_awa.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_', 'award_', x))
p_awa = p_awa.rename(columns=lambda x_ re.sub('award_NO_PROCUREMENT_DISCONTINUED_', 'NO_award_', x))
p_awa = p_awa.rename(columns=lambda x_ re.sub('PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_CALL_D_JUSTIFICATION', 'AWARD_CONTRACT_WITHOUT_CALL_D_JUSTIFICATION', x))
p_awa = p_awa.rename(columns=lambda x_ re.sub('PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_PUBLICATION_D_JUSTIFICATION', 'AWARD_CONTRACT_WITHOUT_PUBLICATION_D_JUSTIFICATION', x))


In [None]:
#p_awa.info()

In [None]:
#p_awa.head()

In [None]:
#display(p_awa)

In [None]:
f_awa = p_awa.to_spark()

In [None]:
display(f_awa)

In [None]:
f_awa.distinct().count()

In [None]:
f_awa.count()

In [None]:
f_awa.distinct().filter(f_awa["CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_OFFICIALNAME"] == 'Mattilsynet').count()

In [None]:
f_awa.filter(f_awa["CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_OFFICIALNAME"] == 'Mattilsynet').distinct().count()

In [None]:
f_awa.filter(f_awa["CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_OFFICIALNAME"] == 'Ålesund kommune').count()

In [None]:
f_awa.createOrReplaceTempView("test")

In [None]:
#df3 = spark.sql("select distinct * from test where test[CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_OFFICIALNAME] == Mattilsynet")

In [None]:
f_awa.filter(f_awa['CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_OFFICIALNAME'] == "Mattilsynet").distinct().show(n=19, vertical=True, truncate=False) 

In [None]:
#f_awa.filter(f_awa['CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_OFFICIALNAME'] == "Mattilsynet").select('AWARDED_CONTRACT_CONTRACTORS_CONTRACTOR_ADDRESS_CONTRACTOR_OFFICIALNAME',"AWARDED_CONTRACT_CONTRACTOR_ADDRESS_CONTRACTOR_OFFICIALNAME", "AWARD_CONTRACT_TITLE", 'CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_OFFICIALNAME', 'CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_CONTACT_POINT', 'OBJECT_CONTRACT_OBJECT_DESCR_SHORT_DESCR').distinct().show(n=20, vertical=True, truncate=False)

In [None]:
f_awa.filter(f_awa["CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_OFFICIALNAME"] == 'Mattilsynet').distinct().show(n=5, vertical=True, truncate=False)

In [None]:
#f_awa.distinct().show(n=2, vertical=True, truncate=False)

In [None]:
f_awa.distinct().show

In [None]:
f_awa.write.mode("append").json("falk2210/f_awa.json")

In [None]:
#f_awa.printSchema()

# F102 Contract

<a id='f102' />

In [None]:
f102 = spark.read.json("falk/F102.json/*.json")
#f102.printSchema()

In [None]:
pf102 = f102.to_pandas_on_spark()

In [None]:
pf102 = pf102.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pf102 = pf102.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pf102 = pf102.rename(columns=lambda x_ re.sub('F102_2014_', '', x))
#pf102 = pf102.rename(columns=lambda x_ re.sub('_', '_', x))
pf102 = pf102.rename(columns=lambda x_ re.sub('\_P$', '', x))

In [None]:
#pf102.info()

In [None]:
pf102["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"] = ps.to_datetime(pf102["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
pf102["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"] = ps.to_datetime(pf102["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
pf102["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"] = ps.to_datetime(pf102["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
pf102["PROCEDURE_DATE_RECEIPT_TENDERS"] = ps.to_datetime(pf102["PROCEDURE_DATE_RECEIPT_TENDERS"])

In [None]:
#pf102.info()

In [None]:
f102 = pf102.to_spark()

In [None]:
f102.write.mode("append").json("falk2210/contract.json")

In [None]:
f102.write.mode("append").json("s3a_//falk2210/f102.json")

In [None]:
f102.write.mode("append").json("s3a_//falk2210/contract.json")

In [None]:
#f102.write.parquet("s3a_//falk2210/f102.parquet")

In [None]:
#f102.write.mode('append').parquet("s3a_//falk2210/contract.parquet")

In [None]:
con = spark.read.json("falk2210/contract.json/*.json")
#con.printSchema()

In [None]:
p_con = con.to_pandas_on_spark()

In [None]:
p_con['OBJECT_CONTRACT_OBJECT_DESCR_AC_COST_AC_CRITERION'].update(p_con.pop('OBJECT_CONTRACT_OBJECT_DESCR_AC_AC_COST_AC_CRITERION'))
p_con['OBJECT_CONTRACT_OBJECT_DESCR_AC_COST_AC_WEIGHTING'].update(p_con.pop('OBJECT_CONTRACT_OBJECT_DESCR_AC_AC_COST_AC_WEIGHTING'))
p_con['OBJECT_CONTRACT_OBJECT_DESCR_AC_CRITERION'].update(p_con.pop('OBJECT_CONTRACT_OBJECT_DESCR_AC_AC_CRITERION'))
p_con['OBJECT_CONTRACT_OBJECT_DESCR_AC_PRICE_AC_WEIGHTING'].update(p_con.pop('OBJECT_CONTRACT_OBJECT_DESCR_AC_AC_PRICE_AC_WEIGHTING'))
p_con['OBJECT_CONTRACT_OBJECT_DESCR_AC_QUALITY_AC_CRITERION'].update(p_con.pop('OBJECT_CONTRACT_OBJECT_DESCR_AC_AC_QUALITY_AC_CRITERION'))
p_con['OBJECT_CONTRACT_OBJECT_DESCR_AC_QUALITY_AC_WEIGHTING'].update(p_con.pop('OBJECT_CONTRACT_OBJECT_DESCR_AC_AC_QUALITY_AC_WEIGHTING'))

In [None]:
#p_con.head(20)

In [None]:
p_con.info()

# Doffin forms
only 14 16 17 18 are used after 2016

<a id='d1' />

# 1 PRI 

In [None]:
d1 = spark.read.json("falk/1.json/*.json")
#d1.printSchema()

In [None]:
pd1 = d1.to_pandas_on_spark()

In [None]:
pd1 = pd1.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd1.info()

In [None]:
pd1 = pd1.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

In [None]:
pd1 = pd1.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd1.info()

In [None]:
pd1 = pd1.rename(columns=lambda x_ re.sub('PRIOR_INFORMATION_', '', x))
#pd1.info()

In [None]:
#pd1.info()

In [None]:
#pd1 = pd1.rename(columns=lambda x_ re.sub('_', '_', x))
#pd1.info()

In [None]:
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_ADDITIONAL_INFORMATION'].update(pd1.pop('FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_ADDITIONAL_INFORMATION_P'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_LOT_DESCRIPTION'].update(pd1.pop('FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_LOT_DESCRIPTION_P'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE'].update(pd1.pop('FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE_P'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_TITLE_CONTRACT'].update(pd1.pop('FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_TITLE_CONTRACT_P'))
pd1['FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_TOTAL_QUANTITY_OR_SCOPE'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_TOTAL_QUANTITY_OR_SCOPE_P'))
pd1['FD_OBJECT_WORKS_TITLE_CONTRACT'].update(pd1.pop('FD_OBJECT_WORKS_TITLE_CONTRACT_P'))
pd1['FD_OTH_INFO_ADDITIONAL_INFORMATION'].update(pd1.pop('FD_OTH_INFO_ADDITIONAL_INFORMATION_P'))
#pd1.info()

In [None]:
pd1 = pd1.rename(columns=lambda x_ re.sub('\_P$', '', x))
#pd1.info()

In [None]:
#pd1.head()

In [None]:
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_ADDITIONAL_INFORMATION'].update(pd1.pop('FD_OBJECT_WORKS_ADDITIONAL_INFORMATION'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_CONTRACT_COVERED_GPA_VALUE'].update(pd1.pop('FD_OBJECT_WORKS_CONTRACT_COVERED_GPA_VALUE'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_CPV_CPV_ADDITIONAL_CPV_CODE_CODE'].update(pd1.pop('FD_OBJECT_WORKS_CPV_CPV_ADDITIONAL_CPV_CODE_CODE'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_CPV_CPV_MAIN_CPV_CODE_CODE'].update(pd1.pop('FD_OBJECT_WORKS_CPV_CPV_MAIN_CPV_CODE_CODE'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_FRAMEWORK_AGREEMENT_VALUE'].update(pd1.pop('FD_OBJECT_WORKS_FRAMEWORK_AGREEMENT_VALUE'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_COSTS_RANGE_AND_CURRENCY_CURRENCY'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_COSTS_RANGE_AND_CURRENCY_CURRENCY'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_COSTS_RANGE_AND_CURRENCY_RANGE_VALUE_COST_HIGH_VALUE'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_COSTS_RANGE_AND_CURRENCY_RANGE_VALUE_COST_HIGH_VALUE'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_COSTS_RANGE_AND_CURRENCY_RANGE_VALUE_COST_LOW_VALUE'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_COSTS_RANGE_AND_CURRENCY_RANGE_VALUE_COST_LOW_VALUE'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_COSTS_RANGE_AND_CURRENCY_VALUE_COST'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_COSTS_RANGE_AND_CURRENCY_VALUE_COST'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_ADDITIONAL_INFORMATION'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_ADDITIONAL_INFORMATION'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_CPV_CPV_ADDITIONAL'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_CPV_CPV_ADDITIONAL'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_CPV_CPV_ADDITIONAL_CPV_CODE_CODE'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_CPV_CPV_ADDITIONAL_CPV_CODE_CODE'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_CPV_CPV_MAIN_CPV_CODE_CODE'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_CPV_CPV_MAIN_CPV_CODE_CODE'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_LOT_DESCRIPTION'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_LOT_DESCRIPTION'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_LOT_NUMBER'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_LOT_NUMBER'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_LOT_TITLE'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_LOT_TITLE'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_NATURE_QUANTITY_SCOPE_COSTS_RANGE_AND_CURRENCY_CURRENCY'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_NATURE_QUANTITY_SCOPE_COSTS_RANGE_AND_CURRENCY_CURRENCY'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_NATURE_QUANTITY_SCOPE_COSTS_RANGE_AND_CURRENCY_RANGE_VALUE_COST_HIGH_VALUE'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_NATURE_QUANTITY_SCOPE_COSTS_RANGE_AND_CURRENCY_RANGE_VALUE_COST_HIGH_VALUE'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_NATURE_QUANTITY_SCOPE_COSTS_RANGE_AND_CURRENCY_RANGE_VALUE_COST_LOW_VALUE'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_NATURE_QUANTITY_SCOPE_COSTS_RANGE_AND_CURRENCY_RANGE_VALUE_COST_LOW_VALUE'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_NATURE_QUANTITY_SCOPE_COSTS_RANGE_AND_CURRENCY_VALUE_COST'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_NATURE_QUANTITY_SCOPE_COSTS_RANGE_AND_CURRENCY_VALUE_COST'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_NATURE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_NATURE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_TITLE_CONTRACT'].update(pd1.pop('FD_OBJECT_WORKS_TITLE_CONTRACT'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_TOTAL_QUANTITY_OR_SCOPE'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_TYPE_CONTRACT_PLACE_DELIVERY_SITE_OR_LOCATION_LABEL'].update(pd1.pop('FD_OBJECT_WORKS_SITE_OR_LOCATION_LABEL'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_TYPE_CONTRACT_PLACE_DELIVERY_SITE_OR_LOCATION_NUTS_CODE'].update(pd1.pop('FD_OBJECT_WORKS_SITE_OR_LOCATION_NUTS_CODE'))

pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_DAY'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_DAY'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_MONTH'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_MONTH'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_YEAR'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_YEAR'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_DAY'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_DAY'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_MONTH'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_MONTH'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_YEAR'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_YEAR'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_MONTHS'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_MONTHS'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PROCEDURE_DATE_STARTING_DAY'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PROCEDURE_DATE_STARTING_DAY'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PROCEDURE_DATE_STARTING_MONTH'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PROCEDURE_DATE_STARTING_MONTH'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PROCEDURE_DATE_STARTING_YEAR'].update(pd1.pop('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_F01_DIVISION_INTO_LOTS_F01_DIV_INTO_LOT_YES_LOT_SCHEDULED_DATE_PERIOD_PROCEDURE_DATE_STARTING_YEAR'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_PROCEDURE_DATE_STARTING_DAY'].update(pd1.pop('FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_PROCEDURE_DATE_STARTING_DAY'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_PROCEDURE_DATE_STARTING_MONTH'].update(pd1.pop('FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_PROCEDURE_DATE_STARTING_MONTH'))
pd1['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_PROCEDURE_DATE_STARTING_YEAR'].update(pd1.pop('FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_PROCEDURE_DATE_STARTING_YEAR'))

In [None]:
#pd1.info()

In [None]:
pd1 = pd1.rename(columns=lambda x_ re.sub('SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_', '', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('_OTH', '', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('F01_', '', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('_QUANTITY_SCOPE', '', x))


In [None]:
pd1.info()

In [None]:
#pd1['datetime_test'] = ps.to_datetime(pd1[['FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_YEAR', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_MONTH', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_DAY']])

In [None]:
fd1 = pd1.to_spark()

In [None]:
fd1 = fd1.withColumn("FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_PROCEDURE_DATE_STARTING", expr("make_date(`FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_PROCEDURE_DATE_STARTING.YEAR`, `FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_PROCEDURE_DATE_STARTING.MONTH`, `FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_PROCEDURE_DATE_STARTING.DAY`)"))
fd1 = fd1.withColumn("FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE", expr("make_date(`FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR`, `FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH`, `FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY`)"))
fd1 = fd1.withColumn("FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE", expr("make_date(`FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR`, `FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH`, `FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY`)"))
fd1 = fd1.withColumn("FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING", expr("make_date(`FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.YEAR`, `FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.MONTH`, `FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.DAY`)"))

#fd1 = fd1.withColumn("FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS.F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE", expr("make_date(`FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS.F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR`, `FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS.F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH`, `FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS.F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY`)"))

#fd1 = fd1.withColumn("FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS.F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE", expr("make_date(`FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS.F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR`, `FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS.F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH`, `FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS.F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY`)"))

#fd1 = fd1.withColumn("FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS.F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING", expr("make_date(`FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS.F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.YEAR`, `FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS.F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.MONTH`, `FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS.F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.DAY`)"))

fd1 = fd1.withColumn("FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE", expr("make_date(`FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR`, `FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH`, `FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY`)"))
fd1 = fd1.withColumn("FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE", expr("make_date(`FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR`, `FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH`, `FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY`)"))

#fd1 = fd1.withColumn("FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING", expr("make_date(`FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.YEAR`, `FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.MONTH`, `FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.DAY`)"))
fd1 = fd1.withColumn("FD_OTH_INFO_NOTICE_DISPATCH_DATE", expr("make_date(`FD_OTH_INFO_NOTICE_DISPATCH_DATE.YEAR`, `FD_OTH_INFO_NOTICE_DISPATCH_DATE.MONTH`, `FD_OTH_INFO_NOTICE_DISPATCH_DATE.DAY`)"))
fd1 = fd1.withColumn("FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE", expr("make_date(`FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR`, `FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH`, `FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY`)"))
fd1 = fd1.withColumn("FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE", expr("make_date(`FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR`, `FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH`, `FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY`)"))
fd1 = fd1.withColumn("FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING", expr("make_date(`FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.YEAR`, `FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.MONTH`, `FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.DAY`)"))

In [None]:
#fd1.printSchema()

In [None]:
pd1 = fd1.to_pandas_on_spark()

In [None]:
#pd1.info()

In [None]:
pd1 = pd1.drop(columns=['FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_PROCEDURE_DATE_STARTING.DAY', 'FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_PROCEDURE_DATE_STARTING.MONTH', 'FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_PROCEDURE_DATE_STARTING.YEAR', 'FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY', 'FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH', 'FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR', 'FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY', 'FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH', 'FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR', 'FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.DAY', 'FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.MONTH', 'FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.YEAR', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR', 'FD_OTH_INFO_NOTICE_DISPATCH_DATE.DAY', 'FD_OTH_INFO_NOTICE_DISPATCH_DATE.MONTH', 'FD_OTH_INFO_NOTICE_DISPATCH_DATE.YEAR'])

In [None]:
#pd1.info()

In [None]:
pd1["FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_PROCEDURE_DATE_STARTING"]= ps.to_datetime(pd1["FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_PROCEDURE_DATE_STARTING"])
pd1["FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"]= ps.to_datetime(pd1["FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"])
pd1["FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"]= ps.to_datetime(pd1["FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"])
pd1["FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING"]= ps.to_datetime(pd1["FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING"])
pd1["FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"]= ps.to_datetime(pd1["FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"])
pd1["FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"]= ps.to_datetime(pd1["FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"])
pd1["FD_OTH_INFO_NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd1["FD_OTH_INFO_NOTICE_DISPATCH_DATE"])

In [None]:
#pd1.info()

In [None]:
#pd1 = pd1.rename(columns=lambda x_ re.sub('', '', x))
#pd1 = pd1.rename(columns=lambda x_ re.sub('', '', x))

Doffin can have one main CPV code and where there are LOT Each LOT can have another main CPV code.  
https_//doffin.no/Notice/Details/2014-462423


In [None]:
pd1 = pd1.rename(columns=lambda x_ re.sub('FD_OTH_INFO_NOTICE_DISPATCH_DATE', 'COMPLEMENTARY_INFO.DATE_DISPATCH_NOTICE', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('FD_AUTHORITY_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE.', 'CONTRACTING_BODY.ADDRESS_CONTRACTING_BODY.', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('FD_AUTHORITY_NAME_ADDRESSES_CONTACT_FURTHER_INFORMATION.', 'CONTRACTING_BODY.ADDRESS_FURTHER_INFO.', x))
#pd1 = pd1.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY.ADDRESS_PARTICIPATION.', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_ITEM', 'OBJECT_CONTRACT.ITEM', x))
#pd1 = pd1.rename(columns=lambda x_ re.sub('', 'OBJECT_CONTRACT.OBJECT_DESCR.ITEM', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_CPV.CPV_MAIN.CPV_CODE.CODE', 'OBJECT_CONTRACT.CPV_MAIN.CPV_CODE.CODE', x))

pd1 = pd1.rename(columns=lambda x_ re.sub('FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_LOT_NUMBER', 'OBJECT_CONTRACT.OBJECT_DESCR.LOT_NO', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_TITLE_CONTRACT', 'OBJECT_CONTRACT.TITLE', x))
#pd1 = pd1.rename(columns=lambda x_ re.sub('', 'OBJECT_CONTRACT.SHORT_DESCR', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_LOT_DESCRIPTION', 'OBJECT_CONTRACT.OBJECT_DESCR.SHORT_DESCR', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_LOT_TITLE', 'OBJECT_CONTRACT.OBJECT_DESCR.TITLE', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_TYPE_CONTRACT_PLACE_DELIVERY.SITE_OR_LOCATION.NUTS.CODE', 'OBJECT_CONTRACT.OBJECT_DESCR.NUTS.CODE', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_TYPE_CONTRACT_PLACE_DELIVERY.SITE_OR_LOCATION.LABEL', 'OBJECT_CONTRACT.OBJECT_DESCR.MAIN_SITE', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('FD_CTYPE', 'OBJECT_CONTRACT.TYPE_CONTRACT.CTYPE', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('FD_OTH_INFO_RELATES_TO_EU_PROJECT_YES', 'OBJECT_CONTRACT.OBJECT_DESCR.EU_PROGR_RELATED', x))
"""
pd1 = pd1.rename(columns=lambda x_ re.sub('FD_LEFTI_MAIN_FINANCING_CONDITIONS', 'LEFTI.MAIN_FINANCING_CONDITIONS', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('', '', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('', '', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('', '', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('', '', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('', '', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('', '', x))
"""

In [None]:
#pd1.info()

In [None]:
pd1 = pd1.rename(columns=lambda x_ re.sub('SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_', '', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('_OTH', '', x))
pd1 = pd1.rename(columns=lambda x_ re.sub('F01_', '', x))
#pd1 = pd1.rename(columns=lambda x_ re.sub('', '', x))
#pd1 = pd1.rename(columns=lambda x_ re.sub('', '', x))
#pd1 = pd1.rename(columns=lambda x_ re.sub('', '', x))

In [None]:
#pd1.info()

In [None]:
pd1.head()

In [None]:
pd1[['OBJECT_CONTRACT.TITLE', 'FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_CPV.CPV_ADDITIONAL.CPV_CODE.CODE', 'FD_OBJECT_SUPPLIES_SERVICES_OBJECT_SUPPLY_SERVICE_QUANTITY_SCOPE_F01_DIVISION_INTO_LOTS.F01_DIV_INTO_LOT_YES.LOT_CPV.CPV_ADDITIONAL.CPV_CODE.CODE']].head()

In [None]:
#pd1['FD_OTH_INFO_ADDITIONAL_INFORMATION'].isnull().head()

In [None]:
#pd1.head()

In [None]:
fd1 = pd1.to_spark()

In [None]:
fd1.write.parquet("s3a_//falk2210/fd1_210921.parquet")

In [None]:
fd1.write.mode('append').json("s3a_//falk2210/fd1.json")

In [None]:
fd1.write.mode('append').json("s3a_//falk2210/_pri.json")

In [None]:
fd1.write.mode('append').parquet("s3a_//falk2210/pri.parquet")

In [None]:
spark.stop()

# 2 Contract 

<a id='d2' />

In [None]:
d2 = spark.read.json("falk/2.json/*.json")
#d2.printSchema()

In [None]:
pd2 = d2.to_pandas_on_spark()

In [None]:
pd2 = pd2.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('CONTRACT_', '', x))

In [None]:
pd2['FD_LEFTI_CONTRACT_RELATING_CONDITIONS_DEPOSITS_GUARANTEES_REQUIRED'].update(pd2.pop('FD_LEFTI_CONTRACT_RELATING_CONDITIONS_DEPOSITS_GUARANTEES_REQUIRED_P'))
pd2['FD_LEFTI_CONTRACT_RELATING_CONDITIONS_EXISTENCE_OTHER_PARTICULAR_CONDITIONS'].update(pd2.pop('FD_LEFTI_CONTRACT_RELATING_CONDITIONS_EXISTENCE_OTHER_PARTICULAR_CONDITIONS_P'))
pd2['FD_LEFTI_CONTRACT_RELATING_CONDITIONS_LEGAL_FORM'].update(pd2.pop('FD_LEFTI_CONTRACT_RELATING_CONDITIONS_LEGAL_FORM_P'))
pd2['FD_LEFTI_CONTRACT_RELATING_CONDITIONS_MAIN_FINANCING_CONDITIONS'].update(pd2.pop('FD_LEFTI_CONTRACT_RELATING_CONDITIONS_MAIN_FINANCING_CONDITIONS_P'))
pd2['FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_ECONOMIC_OPERATORS_PERSONAL_SITUATION'].update(pd2.pop('FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_ECONOMIC_OPERATORS_PERSONAL_SITUATION_P'))
pd2['FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_F02_ECONOMIC_FINANCIAL_CAPACITY_EAF_CAPACITY_INFORMATION'].update(pd2.pop('FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_F02_ECONOMIC_FINANCIAL_CAPACITY_EAF_CAPACITY_INFORMATION_P'))
pd2['FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_F02_ECONOMIC_FINANCIAL_CAPACITY_EAF_CAPACITY_MIN_LEVEL'].update(pd2.pop('FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_F02_ECONOMIC_FINANCIAL_CAPACITY_EAF_CAPACITY_MIN_LEVEL_P'))
pd2['FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_T_CAPACITY_INFORMATION'].update(pd2.pop('FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_T_CAPACITY_INFORMATION_P'))
pd2['FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_T_CAPACITY_MIN_LEVEL'].update(pd2.pop('FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_T_CAPACITY_MIN_LEVEL_P'))
pd2['FD_LEFTI_SERVICES_CONTRACTS_SPECIFIC_CONDITIONS_EXECUTION_SERVICE_RESERVED_PARTICULAR_PROFESSION'].update(pd2.pop('FD_LEFTI_SERVICES_CONTRACTS_SPECIFIC_CONDITIONS_EXECUTION_SERVICE_RESERVED_PARTICULAR_PROFESSION_P'))
pd2['FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_ADDITIONAL_INFORMATION_ABOUT_LOTS'].update(pd2.pop('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_ADDITIONAL_INFORMATION_ABOUT_LOTS_P'))
pd2['FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_LOT_DESCRIPTION'].update(pd2.pop('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_LOT_DESCRIPTION_P'))
pd2['FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_NATURE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE'].update(pd2.pop('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_NATURE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE_P'))
pd2['FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_FRAMEWORK_JUSTIFICATION'].update(pd2.pop('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_FRAMEWORK_JUSTIFICATION_P'))
pd2['FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_FRAMEWORK_TOTAL_ESTIMATED_FREQUENCY_AWARDED_CONTRACTS'].update(pd2.pop('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_FRAMEWORK_TOTAL_ESTIMATED_FREQUENCY_AWARDED_CONTRACTS_P'))
pd2['FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_LOCATION_NUTS_LOCATION'].update(pd2.pop('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_LOCATION_NUTS_LOCATION_P'))
pd2['FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_SHORT_CONTRACT_DESCRIPTION'].update(pd2.pop('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_SHORT_CONTRACT_DESCRIPTION_P'))
pd2['FD_OBJECT_CONTRACT_INFORMATION_QUANTITY_SCOPE_NATURE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE'].update(pd2.pop('FD_OBJECT_CONTRACT_INFORMATION_QUANTITY_SCOPE_NATURE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE_P'))
pd2['FD_OBJECT_CONTRACT_INFORMATION_QUANTITY_SCOPE_OPTIONS_OPTION_DESCRIPTION'].update(pd2.pop('FD_OBJECT_CONTRACT_INFORMATION_QUANTITY_SCOPE_OPTIONS_OPTION_DESCRIPTION_P'))
pd2['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_PLACE_OPENING_PLACE_NOT_STRUCTURED'].update(pd2.pop('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_PLACE_OPENING_PLACE_NOT_STRUCTURED_P'))
pd2['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_FILE_REFERENCE_NUMBER'].update(pd2.pop('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_FILE_REFERENCE_NUMBER_P'))
pd2['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_LANGUAGE_LANGUAGE_OTHER'].update(pd2.pop('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_LANGUAGE_LANGUAGE_OTHER_P'))
pd2['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_TYPE_OF_PROCEDURE_MAXIMUM_NUMBER_INVITED_OPE_OBJECTIVE_CRITERIA'].update(pd2.pop('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_TYPE_OF_PROCEDURE_MAXIMUM_NUMBER_INVITED_OPE_OBJECTIVE_CRITERIA_P'))
pd2['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_TYPE_OF_PROCEDURE_TYPE_OF_PROCEDURE_DETAIL_FOR_CONTRACT_NOTICE_F02_PT_ACCELERATED_NEGOTIATED_PTAN_JUSTIFICATION'].update(pd2.pop('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_TYPE_OF_PROCEDURE_TYPE_OF_PROCEDURE_DETAIL_FOR_CONTRACT_NOTICE_F02_PT_ACCELERATED_NEGOTIATED_PTAN_JUSTIFICATION_P'))
pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_RECURRENT_PROCUREMENT'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_RECURRENT_PROCUREMENT_P'))
pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_ADDITIONAL_INFORMATION'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_ADDITIONAL_INFORMATION_P'))
pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION_P'))

In [None]:
pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_ADDRESS'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_ADDRESS'))
pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_COUNTRY_VALUE'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_COUNTRY_VALUE'))
pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_E_MAILS_E_MAIL'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_E_MAILS_E_MAIL'))
pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_FAX'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_FAX'))
pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_ORGANISATION_NATIONALID'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_ORGANISATION_NATIONALID'))
pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_ORGANISATION_OFFICIALNAME'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_ORGANISATION_OFFICIALNAME'))
pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_PHONE'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_PHONE'))
pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_POSTAL_CODE'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_POSTAL_CODE'))
pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_TOWN'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_TOWN'))
pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_URL'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_URL'))

In [None]:
#pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_ADDITIONAL_INFORMATION'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_ADDITIONAL_INFORMATION_P'))
#pd2['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION'].update(pd2.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION_P'))

In [None]:
#pd2.info()

In [None]:
pd2 = pd2.rename(columns=lambda x_ re.sub('\_P$', '', x))
#pd2.info()

In [None]:
pd2 = pd2.rename(columns=lambda x_ re.sub('DOFFIN_FORM_TYPE_NATIONAL_', '', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_MEDIATION_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_', 'COMPLEMENTARY_INFO_ADDRESS_MEDIATION_BODY_', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_INFORMATION_NAME_ADDRESSES_CONTACT_FURTHER_INFORMATION_', 'CONTRACTING_BODY_ADDRESS_FURTHER_INFO_', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_', 'COMPLEMENTARY_INFO_ADDRESS_REVIEW_BODY_', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_INFORMATION_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_', 'CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_INFORMATION_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_YES_CONTACT_DATA_OTHER_BEHALF_CONTRACTING_AUTORITHY_', 'CONTRACTING_BODY_ADDRESS_PARTICIPATION_', x))

pd2 = pd2.rename(columns=lambda x_ re.sub('FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_F02_ECONOMIC_FINANCIAL_CAPACITY_EAF_CAPACITY_INFORMATION', 'LEFTI_ECONOMIC_FINANCIAL_INFO', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_F02_ECONOMIC_FINANCIAL_CAPACITY_EAF_CAPACITY_MIN_LEVEL', 'LEFTI_ECONOMIC_FINANCIAL_MIN_LEVEL', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_T_CAPACITY_INFORMATION', 'LEFTI_TECHNICAL_PROFESSIONAL_INFO', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_T_CAPACITY_MIN_LEVEL', 'LEFTI_TECHNICAL_PROFESSIONAL_MIN_LEVEL', x)) 

pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_CPV_CPV_MAIN_CPV_CODE_CODE', 'OBJECT_CONTRACT_CPV_MAIN_CPV_CODE_CODE', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_CPV_ADDITIONAL_CPV_CODE_CODE', 'OBJECT_CONTRACT_OBJECT_DESCR_CPV_ADDITIONAL_CPV_SUPPLEMENTARY_CODE_CODE', x)) 
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_CPV_CPV_ADDITIONAL', 'OBJECT_CONTRACT_OBJECT_DESCR_CPV_ADDITIONAL', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_CPV_CPV_ADDITIONAL_CPV_CODE_CODE', 'OBJECT_CONTRACT_OBJECT_DESCR_CPV_ADDITIONAL_CPV_CODE_CODE', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_ITEM', 'OBJECT_CONTRACT_OBJECT_DESCR_ITEM', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_LOT_NUMBER', 'OBJECT_CONTRACT_OBJECT_DESCR_LOT_NO', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_LOT_DESCRIPTION', 'OBJECT_CONTRACT_OBJECT_DESCR_SHORT_DESCR', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_LOT_TITLE', 'OBJECT_CONTRACT_OBJECT_DESCR_TITLE', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_LOCATION_NUTS_NUTS_CODE', 'OBJECT_CONTRACT_OBJECT_DESCR_NUTS_CODE', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_LOCATION_NUTS_LOCATION', 'OBJECT_CONTRACT_OBJECT_DESCR_LOCATION', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_TITLE_CONTRACT', 'OBJECT_CONTRACT_TITLE', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_PLACE_OPENING_', 'PROCEDURE_OPENING_CONDITION_', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_ADDITIONAL_INFORMATION', 'COMPLEMENTARY_INFO_INFO_ADD', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_RELATES_TO_EU_PROJECT_YES', 'OBJECT_CONTRACT_OBJECT_DESCR_EU_PROGR_RELATED', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_INFORMATION_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_URL_INFORMATION', 'CONTRACTING_BODY_URL_DOCUMENT', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_INFORMATION_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_URL_PARTICIPATE', 'CONTRACTING_BODY_URL_PARTICIPATION', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_CTYPE', 'OBJECT_CONTRACT_TYPE_CONTRACT_CTYPE', x))  
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_LEFTI_CONTRACT_RELATING_CONDITIONS_DEPOSITS_GUARANTEES_REQUIRED', 'LEFTI_DEPOSIT_GUARANTEE_REQUIRED', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_LEFTI_CONTRACT_RELATING_CONDITIONS_LEGAL_FORM', 'LEFTI_LEGAL_FORM', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_LEFTI_CONTRACT_RELATING_CONDITIONS_MAIN_FINANCING_CONDITIONS', 'LEFTI_MAIN_FINANCING_CONDITION', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_CONTRACT_COVERED_GPA_VALUE', 'PROCEDURE_CONTRACT_COVERED_GPA_CTYPE', x))                                       
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_FRAMEWORK_JUSTIFICATION', 'PROCEDURE_FRAMEWORK_JUSTIFICATION', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('ORGANISATION_NATIONALID', 'NATIONALID', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('ORGANISATION_OFFICIALNAME', 'OFFICIALNAME', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_ADDITIONAL_INFORMATION_ABOUT_LOTS', 'OBJECT_CONTRACT_OBJECT_DESCR_INFO_ADD', x))

 
    

In [None]:
#pd2.info()

In [None]:
fd2 = pd2.to_spark()

In [None]:
fd2 = fd2.withColumn("COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_NOTICE_DISPATCH_DATE_YEAR`, `FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_NOTICE_DISPATCH_DATE_MONTH`, `FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_NOTICE_DISPATCH_DATE_DAY`)"))
fd2 = fd2.withColumn("FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE", expr("make_date(`FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_YEAR`, `FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_MONTH`, `FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_DAY`)"))
fd2 = fd2.withColumn("FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE", expr("make_date(`FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_YEAR`, `FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_MONTH`, `FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_DAY`)"))
fd2 = fd2.withColumn("FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE", expr("make_date(`FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_YEAR`, `FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_MONTH`, `FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_DAY`)"))
fd2 = fd2.withColumn("FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE", expr("make_date(`FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_YEAR`, `FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_MONTH`, `FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_DAY`)"))
fd2 = fd2.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE_TIME_YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE_TIME_MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE_TIME_DAY`)"))
fd2 = fd2.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS_TIME_LIMIT_YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS_TIME_LIMIT_MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS_TIME_LIMIT_DAY`)"))
fd2 = fd2.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE_YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE_MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE_DAY`)"))
fd2 = fd2.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE_YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE_MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE_DAY`)"))
fd2 = fd2.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_DAY`)"))
fd2 = fd2.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ_YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ_MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ_DAY`)"))
fd2 = fd2.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE_YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE_MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE_DAY`)"))

In [None]:
pd2 = fd2.to_pandas_on_spark()

In [None]:
pd2 = pd2.drop(columns=['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_NOTICE_DISPATCH_DATE_DAY', 'FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_NOTICE_DISPATCH_DATE_MONTH', 'FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_NOTICE_DISPATCH_DATE_YEAR',\
                        'FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_DAY', 'FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_MONTH', 'FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_YEAR',\
                        'FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_DAY', 'FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_MONTH', 'FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_YEAR',\
                        'FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_DAY', 'FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_MONTH', 'FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_YEAR',\
                        'FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_DAY', 'FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_MONTH', 'FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_YEAR',\
                        'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE_TIME_DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE_TIME_MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE_TIME_YEAR'])
                        

In [None]:
pd2 = pd2.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS_TIME_LIMIT_DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS_TIME_LIMIT_MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS_TIME_LIMIT_YEAR',\
                        'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE_DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE_MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE_YEAR',\
                        'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE_DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE_MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE_YEAR',\
                        'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_YEAR',\
                        'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ_DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ_MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ_YEAR',\
                        'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE_DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE_MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE_YEAR'])

In [None]:
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE', 'PROCEDURE_OPENING_CONDITION_DATE_OPENING_TENDERS', x))  
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE', 'PROCEDURE_DATE_RECEIPT_TENDERS', x))    
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE', 'PROCEDURE_DATE_DISPATCH_INVITATIONS', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE', 'PROCEDURE_DATE_TENDER_VALID', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE', 'OBJECT_CONTRACT_OBJECT_DESCR_DATE_START', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE', 'OBJECT_CONTRACT_OBJECT_DESCR_DATE_END', x))

In [None]:
pd2 = pd2.rename(columns=lambda x_ re.sub('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_', '', x))

In [None]:
pd2.info()

In [None]:
pd2 = pd2.rename(columns=lambda x_ re.sub('DOFFIN_FORM_TYPE_', '', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_', '', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('PROCEDURES_FOR_APPEAL_', '', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_INFORMATION_', '', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_LEFTI_CONTRACT_RELATING_CONDITIONS_', '', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_', '', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('F02_', '', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_', '', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_', '', x))
pd2 = pd2.rename(columns=lambda x_ re.sub('ANNEX_B_', '', x))

In [None]:
fd2 = pd2.to_spark()

In [None]:
fd2 = fd2.withColumn("FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE.DAY`)"))
fd2 = fd2.withColumn("FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE", expr("make_date(`FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR`, `FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH`, `FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY`)"))
fd2 = fd2.withColumn("FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE", expr("make_date(`FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR`, `FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH`, `FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY`)"))
fd2 = fd2.withColumn("FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE", expr("make_date(`FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR`, `FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH`, `FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY`)"))
fd2 = fd2.withColumn("FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE", expr("make_date(`FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR`, `FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH`, `FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY`)"))
fd2 = fd2.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_FOR_OPENING_TENDERS.DATE_TIME", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_FOR_OPENING_TENDERS.DATE_TIME.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_FOR_OPENING_TENDERS.DATE_TIME.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_FOR_OPENING_TENDERS.DATE_TIME.DAY`)"))
fd2 = fd2.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_OBTAINING_SPECIFICATIONS.TIME_LIMIT", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_OBTAINING_SPECIFICATIONS.TIME_LIMIT.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_OBTAINING_SPECIFICATIONS.TIME_LIMIT.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_OBTAINING_SPECIFICATIONS.TIME_LIMIT.DAY`)"))
fd2 = fd2.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.DISPATCH_INVITATIONS_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.DISPATCH_INVITATIONS_DATE.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.DISPATCH_INVITATIONS_DATE.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.DISPATCH_INVITATIONS_DATE.DAY`)"))
fd2 = fd2.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.MINIMUM_TIME_MAINTAINING_TENDER.UNTIL_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.MINIMUM_TIME_MAINTAINING_TENDER.UNTIL_DATE.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.MINIMUM_TIME_MAINTAINING_TENDER.UNTIL_DATE.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.MINIMUM_TIME_MAINTAINING_TENDER.UNTIL_DATE.DAY`)"))
fd2 = fd2.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ", expr("make_date(`.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.DAY`)"))
fd2 = fd2.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.PREVIOUS_PUBLICATION_NOTICE_F2.DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.PREVIOUS_PUBLICATION_NOTICE_F2.DATE_OJ.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.PREVIOUS_PUBLICATION_NOTICE_F2.DATE_OJ.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.PREVIOUS_PUBLICATION_NOTICE_F2.DATE_OJ.DAY`)"))
fd2 = fd2.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.RECEIPT_LIMIT_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.RECEIPT_LIMIT_DATE.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.RECEIPT_LIMIT_DATE.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.RECEIPT_LIMIT_DATE.DAY`)"))

In [None]:
fd2.printSchema()

In [None]:
pd2 = fd2.to_pandas_on_spark()

pd2.info()

In [None]:
pd2 = pd2.drop(columns=['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE.YEAR'])
pd2 = pd2.drop(columns=['FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY', 'FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH', 'FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR'])
pd2 = pd2.drop(columns=['FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY', 'FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH', 'FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR'])
pd2 = pd2.drop(columns=['FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY', 'FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH', 'FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR'])
pd2 = pd2.drop(columns=['FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY', 'FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH', 'FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR'])
pd2 = pd2.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_FOR_OPENING_TENDERS.DATE_TIME.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_FOR_OPENING_TENDERS.DATE_TIME.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_FOR_OPENING_TENDERS.DATE_TIME.YEAR'])
pd2 = pd2.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_OBTAINING_SPECIFICATIONS.TIME_LIMIT.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_OBTAINING_SPECIFICATIONS.TIME_LIMIT.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_OBTAINING_SPECIFICATIONS.TIME_LIMIT.YEAR'])
pd2 = pd2.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.DISPATCH_INVITATIONS_DATE.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.DISPATCH_INVITATIONS_DATE.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.DISPATCH_INVITATIONS_DATE.YEAR'])
pd2 = pd2.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.MINIMUM_TIME_MAINTAINING_TENDER.UNTIL_DATE.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.MINIMUM_TIME_MAINTAINING_TENDER.UNTIL_DATE.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.MINIMUM_TIME_MAINTAINING_TENDER.UNTIL_DATE.YEAR'])
pd2 = pd2.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.YEAR'])
pd2 = pd2.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.PREVIOUS_PUBLICATION_NOTICE_F2.DATE_OJ.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.PREVIOUS_PUBLICATION_NOTICE_F2.DATE_OJ.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.PREVIOUS_PUBLICATION_NOTICE_F2.DATE_OJ.YEAR'])
pd2 = pd2.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.RECEIPT_LIMIT_DATE.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.RECEIPT_LIMIT_DATE.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.RECEIPT_LIMIT_DATE.YEAR'])

In [None]:
pd2.info()

In [None]:
pd2["FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd2["FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE"])
pd2["FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"]= ps.to_datetime(pd2["FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"])
pd2["FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"]= ps.to_datetime(pd2["FD_OBJECT_CONTRACT_INFORMATION.DESCRIPTION_CONTRACT_INFORMATION.F02_DIVISION_INTO_LOTS.F02_DIV_INTO_LOT_YES.F02_ANNEX_B.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"])
pd2["FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"]= ps.to_datetime(pd2["FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"])
pd2["FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"]= ps.to_datetime(pd2["FD_OBJECT_CONTRACT_INFORMATION.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"])
pd2["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_FOR_OPENING_TENDERS.DATE_TIME"]= ps.to_datetime(pd2["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_FOR_OPENING_TENDERS.DATE_TIME"])
pd2["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_OBTAINING_SPECIFICATIONS.TIME_LIMIT"]= ps.to_datetime(pd2["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.CONDITIONS_OBTAINING_SPECIFICATIONS.TIME_LIMIT"])
pd2["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.DISPATCH_INVITATIONS_DATE"]= ps.to_datetime(pd2["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.DISPATCH_INVITATIONS_DATE"])
pd2["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.MINIMUM_TIME_MAINTAINING_TENDER.UNTIL_DATE"]= ps.to_datetime(pd2["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.MINIMUM_TIME_MAINTAINING_TENDER.UNTIL_DATE"])
pd2["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ"]= ps.to_datetime(pd2["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ"])
pd2["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.PREVIOUS_PUBLICATION_NOTICE_F2.DATE_OJ"]= ps.to_datetime(pd2["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2.PREVIOUS_PUBLICATION_EXISTS_F2.PREVIOUS_PUBLICATION_NOTICE_F2.DATE_OJ"])
pd2["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.RECEIPT_LIMIT_DATE"]= ps.to_datetime(pd2["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE.RECEIPT_LIMIT_DATE"])

In [None]:
pd2.info() 

In [None]:
fd2 = spark.read.parquet("s3a_//falk2210/fd2_210921.parquet/*.parquet")
pd2 = fd2.to_pandas_on_spark()

In [None]:
fd2 = pd2.to_spark()


fd2.write.parquet("s3a_//falk2210/fd2_240921.parquet")

In [None]:
fd2 = pd2.to_spark()
#f102 = f102.drop("__index_level_0__")

fd2.write.parquet("s3a_//falk2210/fd2_210921.parquet")

In [None]:
#fd2 = spark.read.parquet("s3a_//falk2210/fd2_210921.parquet/*.parquet")

In [None]:
#fd2.printSchema()

In [None]:
spark.stop()

In [None]:
#pd2 = fd2.to_pandas_on_spark()

In [None]:
#pd2.info()

# 3 AWARD_OF_CONTRACT

<a id='d3' />

In [None]:
d3 = spark.read.json("falk/3.json/*.json")
#d3.printSchema()

In [None]:
pd3 = d3.to_pandas_on_spark()

In [None]:
pd3 = pd3.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd3.info()

In [None]:
pd3 = pd3.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

In [None]:
pd3 = pd3.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd3.info()

In [None]:
pd3 = pd3.rename(columns=lambda x_ re.sub('CONTRACT_AWARD_', '', x))
#pd3.info()

In [None]:
pd3['FD_AWARD_OF_CONTRACT_CONTRACT_TITLE'].update(pd3.pop('FD_AWARD_OF_CONTRACT_CONTRACT_TITLE_P'))
pd3['FD_AWARD_OF_CONTRACT_MORE_INFORMATION_TO_SUB_CONTRACTED_CONTRACT_LIKELY_SUB_CONTRACTED_ADDITIONAL_INFORMATION'].update(pd3.pop('FD_AWARD_OF_CONTRACT_MORE_INFORMATION_TO_SUB_CONTRACTED_CONTRACT_LIKELY_SUB_CONTRACTED_ADDITIONAL_INFORMATION_P'))
pd3['FD_COMPLEMENTARY_INFORMATION_ADDITIONAL_INFORMATION'].update(pd3.pop('FD_COMPLEMENTARY_INFORMATION_ADDITIONAL_INFORMATION_P'))
pd3['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION'].update(pd3.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION_P'))
pd3['FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_LOCATION_NUTS_LOCATION'].update(pd3.pop('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_LOCATION_NUTS_LOCATION_P'))
pd3['FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_SHORT_CONTRACT_DESCRIPTION'].update(pd3.pop('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_SHORT_CONTRACT_DESCRIPTION_P'))
pd3['FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_TITLE_CONTRACT'].update(pd3.pop('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_TITLE_CONTRACT_P'))
pd3['FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_FILE_REFERENCE_NUMBER'].update(pd3.pop('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_FILE_REFERENCE_NUMBER_P'))
pd3['FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_TYPE_OF_PROCEDURE_DEF_F03_AWARD_WITHOUT_PRIOR_PUBLICATION_ANNEX_D_REASON_CONTRACT_LAWFUL'].update(pd3.pop('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_TYPE_OF_PROCEDURE_DEF_F03_AWARD_WITHOUT_PRIOR_PUBLICATION_ANNEX_D_REASON_CONTRACT_LAWFUL_P'))

In [None]:
pd3 = pd3.rename(columns=lambda x_ re.sub('\_P$', '', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('F03_', '', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('_F03', '', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_', '', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('DOFFIN_FORM_TYPE_', '', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_', '', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('ORGANISATION_', '', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3_PREVIOUS_PUBLICATION_EXISTS_F3', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_PREVIOUS_PUBLICATION_EXISTS', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('PREVIOUS_PUBLICATION_NOTICE_F3', 'PREVIOUS_PUBLICATION_NOTICE', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('PREVIOUS_NOTICE_BUYER_PROFILE_F3', 'PREVIOUS_NOTICE_BUYER_PROFILE', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('AUTHORITY_INFORMATION_', '', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('E_MAILS_', '', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_PREVIOUS_PUBLICATION_EXISTS_', 'FD_PREVIOUS_PUBLICATION_EXISTS_', x))

In [None]:
pd3['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_ADDRESS'].update(pd3.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_ADDRESS'))
pd3['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_COUNTRY_VALUE'].update(pd3.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_COUNTRY_VALUE'))
pd3['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_E_MAIL'].update(pd3.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_E_MAIL'))
pd3['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_FAX'].update(pd3.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_FAX'))
pd3['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_NATIONALID'].update(pd3.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_NATIONALID'))
pd3['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_OFFICIALNAME'].update(pd3.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_OFFICIALNAME'))
pd3['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_PHONE'].update(pd3.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_PHONE'))
pd3['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_POSTAL_CODE'].update(pd3.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_POSTAL_CODE'))
pd3['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_TOWN'].update(pd3.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_TOWN'))
pd3['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_URL'].update(pd3.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_URL'))

In [None]:
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_ECONOMIC_OPERATOR_NAME_ADDRESS_', 'AWARDED_CONTRACT_CONTRACTOR_ADDRESS_CONTRACTOR_', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_', 'AWARD_CONTRACT_', x)) #ITEM
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_MEDIATION_PROCEDURE_BODY_RESPONSIBLE_', 'COMPLEMENTARY_INFO_ADDRESS_MEDIATION_BODY_', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_', 'COMPLEMENTARY_INFO_ADDRESS_REVIEW_BODY_', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE_', 'COMPLEMENTARY_INFO_', x)) #COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_', 'CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_', x))
#pd3 = pd3.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_ADDRESS_FURTHER_INFO_', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_YES_CONTACT_DATA_OTHER_BEHALF_CONTRACTING_AUTORITHY_', 'CONTRACTING_BODY_ADDRESS_PARTICIPATION_', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_TYPE_OF_CONTRACTING_AUTHORITY_', 'CONTRACTING_BODY_', x)) #CONTRACTING_BODY_CA_ACTIVITY_VALUE
#pd3 = pd3.rename(columns=lambda x_ re.sub('', 'LEFTI_', x)) #LEFTI_PARTICULAR_PROFESSION_CTYPE
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_CPV_', 'OBJECT_CONTRACT_', x))  #OBJECT_CONTRACT_CPV_MAIN_CPV_CODE_CODE
#pd3 = pd3.rename(columns=lambda x_ re.sub('', 'OBJECT_CONTRACT_OBJECT_DESCR_', x)) #OBJECT_CONTRACT_OBJECT_DESCR_CPV_ADDITIONAL
#pd3 = pd3.rename(columns=lambda x_ re.sub('', 'OBJECT_CONTRACT_OBJECT_DESCR_', x))  #OBJECT_CONTRACT_OBJECT_DESCR_ITEM
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_', 'OBJECT_CONTRACT_', x)) #OBJECT_CONTRACT_TITLE
#pd3 = pd3.rename(columns=lambda x_ re.sub('', 'PROCEDURE_', x)) #PROCEDURE_DATE_RECEIPT_TENDERS
pd3 = pd3.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_TITLE_CONTRACT', 'OBJECT_CONTRACT_OBJECT_DESCR_TITLE', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_SHORT_CONTRACT_DESCRIPTION', 'OBJECT_CONTRACT_OBJECT_DESCR_SHORT_DESCR', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_MORE_INFORMATION_TO_SUB_CONTRACTED_CONTRACT_LIKELY_SUB_CONTRACTED_EXCLUDING_VAT_VALUE_CURRENCY', 'AWARDED_CONTRACT_VAL_SUBCONTRACTING_CURRENCY', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_MORE_INFORMATION_TO_SUB_CONTRACTED_CONTRACT_LIKELY_SUB_CONTRACTED_EXCLUDING_VAT_VALUE_text', 'AWARDED_CONTRACT_VAL_SUBCONTRACTING_text', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_CURRENCY', 'OBJECT_CONTRACT_VAL_TOTAL_CURRENCY', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_HIGH_VALUE', 'OBJECT_CONTRACT_VAL_RANGE_TOTAL_HIGH', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_LOW_VALUE', 'OBJECT_CONTRACT_VAL_RANGE_TOTAL_LOW', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_VALUE_COST', 'OBJECT_CONTRACT_VAL_TOTAL_text', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_CURRENCY', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_HIGH_VALUE', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_HIGH', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_LOW_VALUE', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_LOW', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_INITIAL_ESTIMATED_TOTAL_VALUE_CONTRACT_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_CURRENCY', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_INITIAL_ESTIMATED_TOTAL_VALUE_CONTRACT_VALUE_COST', 'AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_text', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_TYPE_OF_PROCEDURE_DEF_PT_NEGOTIATED_WITHOUT_COMPETITION_ANNEX_D_REASON_CONTRACT_LAWFUL', 'PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_CALL_D_JUSTIFICATION', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_TYPE_OF_PROCEDURE_DEF_AWARD_WITHOUT_PRIOR_PUBLICATION_ANNEX_D_REASON_CONTRACT_LAWFUL', 'PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_PUBLICATION_D_JUSTIFICATION', x))
#pd3 = pd3.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_URL_DOCUMENT', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_URL_PARTICIPATE', 'CONTRACTING_BODY_URL_PARTICIPATION', x))
#pd3 = pd3.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_URL_TOOL', x))
#pd3 = pd3.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_URL_GENERAL', 'CONTRACTING_BODY_ADDRESS_FURTHER_INFO_URL_GENERAL', x))
#pd3 = pd3.rename(columns=lambda x_ re.sub('', 'PROCEDURE_URL_NATIONAL_PROCEDURE', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_', 'CONTRACTING_BODY_', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_AWARD_CRITERIA_CONTRACT_AWARD_NOTICE_INFORMATION_AWARD_CRITERIA_DETAIL_MOST_ECONOMICALLY_ADVANTAGEOUS_TENDER_SHORT_CRITERIA_DEFINITION_', 'PROCEDURE_', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_TITLE', 'AWARD_CONTRACT_TITLE', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_LOT_NUMBER', 'AWARD_CONTRACT_LOT_NO', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_ADDITIONAL_INFORMATION', 'COMPLEMENTARY_INFO_INFO_ADD', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_RELATES_TO_EU_PROJECT_YES', 'OBJECT_CONTRACT_OBJECT_DESCR_EU_PROGR_RELATED', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_FILE_REFERENCE_NUMBER', 'OBJECT_CONTRACT_REFERENCE_NUMBER', x)) 
pd3 = pd3.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_MORE_INFORMATION_TO_SUB_CONTRACTED_CONTRACT_LIKELY_SUB_CONTRACTED_', 'AWARD_CONTRACT_CONTRACT_LIKELY_SUB_CONTRACTED_', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_TYPE_AND_ACTIVITIES_', 'FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_TYPE_OF_PROCEDURE_DEF_AWARD_WITHOUT_PRIOR_PUBLICATION_ANNEX_D_NO_OPEN_RESTRICTED_VALUE', 'FD_PROCEDURE_DEFINITION_TYPE_OF_PROCEDUREAWARD_WITHOUT_PRIOR_PUBLICATION_ANNEX_D_ANNEX_D1_NO_OPEN_RESTRICTED_VALUE', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_TYPE_OF_PROCEDURE_DEF_PT_NEGOTIATED_WITHOUT_COMPETITION_ANNEX_D_NO_OPEN_RESTRICTED_VALUE', 'FD_PROCEDURE_DEFINITION_TYPE_OF_PROCEDUREPT_NEGOTIATED_WITHOUT_COMPETITION_ANNEX_D_ANNEX_D1_NO_OPEN_RESTRICTED_VALUE', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_TYPE_CONTRACT_LOCATION_W_PUB_', 'OBJECT_CONTRACT_TYPE_CONTRACT_LOCATION_', x))
pd3 = pd3.rename(columns=lambda x_ re.sub('SERVICE_CATEGORY_PUB', 'SERVICE_CATEGORY', x))


In [None]:
pd3.info()

In [None]:
fd3 = pd3.to_spark()

In [None]:
"""
fd3 = fd3.withColumn("FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE", expr("make_date(`FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE.YEAR`, `FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE.MONTH`, `FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE.DAY`)"))
fd3 = fd3.withColumn("FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.DAY`)"))
fd3 = fd3.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.CNT_NOTICE_INFORMATION.DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.CNT_NOTICE_INFORMATION.DATE_OJ.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.CNT_NOTICE_INFORMATION.DATE_OJ.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.CNT_NOTICE_INFORMATION.DATE_OJ.DAY`)"))
fd3 = fd3.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.EX_ANTE_NOTICE_INFORMATION.DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.DAY`)"))
fd3 = fd3.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.DAY`)"))
fd3 = fd3.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.PREVIOUS_PUBLICATION_NOTICE_F3.DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.PREVIOUS_PUBLICATION_NOTICE_F3.DATE_OJ.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.PREVIOUS_PUBLICATION_NOTICE_F3.DATE_OJ.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.PREVIOUS_PUBLICATION_NOTICE_F3.DATE_OJ.DAY`)"))

In [None]:
#fd3.printSchema()

In [None]:
"""
pd3 = fd3.to_pandas_on_spark()

pd3.info()

In [None]:
"""
pd3 = pd3.drop(columns=['FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE.DAY', 'FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE.MONTH', 'FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE.YEAR'])
pd3 = pd3.drop(columns=['FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.YEAR'])
pd3 = pd3.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.CNT_NOTICE_INFORMATION.DATE_OJ.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.CNT_NOTICE_INFORMATION.DATE_OJ.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.CNT_NOTICE_INFORMATION.DATE_OJ.YEAR'])
pd3 = pd3.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.YEAR'])
pd3 = pd3.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.YEAR'])
pd3 = pd3.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.PREVIOUS_PUBLICATION_NOTICE_F3.DATE_OJ.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.PREVIOUS_PUBLICATION_NOTICE_F3.DATE_OJ.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.PREVIOUS_PUBLICATION_NOTICE_F3.DATE_OJ.YEAR'])

In [None]:
#pd3.info()

In [None]:
"""
pd3["FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE"]= ps.to_datetime(pd3["FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE"])
pd3["FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd3["FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE"])
pd3["FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.CNT_NOTICE_INFORMATION.DATE_OJ"]= ps.to_datetime(pd3["FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.CNT_NOTICE_INFORMATION.DATE_OJ"])
pd3["FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.EX_ANTE_NOTICE_INFORMATION.DATE_OJ"]= ps.to_datetime(pd3["FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.EX_ANTE_NOTICE_INFORMATION.DATE_OJ"])
pd3["FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ"]= ps.to_datetime(pd3["FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ"])
pd3["FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.PREVIOUS_PUBLICATION_NOTICE_F3.DATE_OJ"]= ps.to_datetime(pd3["FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3.PREVIOUS_PUBLICATION_EXISTS_F3.PREVIOUS_PUBLICATION_NOTICE_F3.DATE_OJ"])

In [None]:
#pd3.info() 

In [None]:
#pd3['DOFFIN_APPENDIX.DOFFIN_FORM_TYPE.NATIONAL.REFERENCE_SECTION.REFERENCE_DOFFIN.TYPE'].unique()

In [None]:
#fd3 = pd3.to_spark()

In [None]:
fd3.write.mode("append").json("falk2210/f_award.json")

In [None]:
#fd3.write.parquet("s3a_//falk2210/fd3_210921.parquet")

In [None]:
"""
pd3.select("FD_AWARD_OF_CONTRACT_ECONOMIC_OPERATOR_NAME_ADDRESS_OFFICIALNAME", "FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_OFFICIALNAME") \
    .distinct() \
    .show(20)

In [None]:
"""
tabel1 = 'FORM'
filter1 = 3

test = ps.sql("SELECT * FROM {pd3} WHERE {tabel1} = {filter1}")

In [None]:
#pd3.head(5)s

In [None]:
#test.head()

# 4 PRI

<a id='d4' />

In [None]:
d4 = spark.read.json("falk/4.json/*.json")
#d4.printSchema()

In [None]:
pd4 = d4.to_pandas_on_spark()

In [None]:
pd4 = pd4.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd4.info()

In [None]:
pd4 = pd4.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

In [None]:
pd4 = pd4.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd4.info()

In [None]:
pd4 = pd4.rename(columns=lambda x_ re.sub('PERIODIC_INDICATIVE_UTILITIES_', '', x))
#pd4.info()

In [None]:
#pd4 = pd4.rename(columns=lambda x_ re.sub('_', '_', x))
#pd4.info()

In [None]:
pd4 = pd4.rename(columns=lambda x_ re.sub('\_P$', '', x))
#pd4.info()

In [None]:
#pd4.info()

In [None]:
#pd4['DOFFIN_APPENDIX.DOFFIN_FORM_TYPE.NATIONAL.REFERENCE_SECTION.REFERENCE_DOFFIN.TYPE'].unique()

In [None]:
fd4 = pd4.to_spark()

In [None]:
"""
fd4 = fd4.withColumn("FD_COMPLEMENTARY_INFORMATION_PERIODIC_INDICATIVE.NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_PERIODIC_INDICATIVE.NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_PERIODIC_INDICATIVE.NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_PERIODIC_INDICATIVE.NOTICE_DISPATCH_DATE.DAY`)"))
fd4 = fd4.withColumn("FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_APPLICATION", expr("make_date(`FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_APPLICATION.YEAR`, `FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_APPLICATION.MONTH`, `FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_APPLICATION.DAY`)"))
fd4 = fd4.withColumn("FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_INTEREST", expr("make_date(`FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_INTEREST.YEAR`, `FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_INTEREST.MONTH`, `FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_INTEREST.DAY`)"))
fd4 = fd4.withColumn("FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE", expr("make_date(`FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR`, `FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH`, `FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY`)"))
fd4 = fd4.withColumn("FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE", expr("make_date(`FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR`, `FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH`, `FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY`)"))
fd4 = fd4.withColumn("FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING", expr("make_date(`FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.YEAR`, `FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.MONTH`, `FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.DAY`)"))
"""

In [None]:
#fd4.printSchema()

In [None]:
pd4 = fd4.to_pandas_on_spark()

#pd4.info()

In [None]:
pd4 = pd4.drop(columns=['FD_COMPLEMENTARY_INFORMATION_PERIODIC_INDICATIVE.NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_PERIODIC_INDICATIVE.NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_PERIODIC_INDICATIVE.NOTICE_DISPATCH_DATE.YEAR'])
pd4 = pd4.drop(columns=['FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_APPLICATION.DAY', 'FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_APPLICATION.MONTH', 'FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_APPLICATION.YEAR'])
pd4 = pd4.drop(columns=['FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_INTEREST.DAY', 'FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_INTEREST.MONTH', 'FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_INTEREST.YEAR'])
pd4 = pd4.drop(columns=['FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY', 'FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH', 'FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR'])
pd4 = pd4.drop(columns=['FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY', 'FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH', 'FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR'])
pd4 = pd4.drop(columns=['FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.DAY', 'FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.MONTH', 'FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.YEAR'])

In [None]:
#pd4.info()

In [None]:
pd4["FD_COMPLEMENTARY_INFORMATION_PERIODIC_INDICATIVE.NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd4["FD_COMPLEMENTARY_INFORMATION_PERIODIC_INDICATIVE.NOTICE_DISPATCH_DATE"])
pd4["FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_APPLICATION"]= ps.to_datetime(pd4["FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_APPLICATION"])
pd4["FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_INTEREST"]= ps.to_datetime(pd4["FD_INTRODUCTION_PERIODIC_INDICATIVE.ANNEX_I.AI_PROCEDURE_PERIODIC_INDICATIVE.ADMINISTRATIVE_INFORMATION_DEF.DATE_LIMIT_RECEIPT_INTEREST"])
pd4["FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"]= ps.to_datetime(pd4["FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"])
pd4["FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"]= ps.to_datetime(pd4["FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"])
pd4["FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING"]= ps.to_datetime(pd4["FD_OBJECT_CONTRACT_PERIODIC_INDICATIVE.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING"])

In [None]:
#pd4.info()

In [None]:
pd4 = fd4.to_pandas_on_spark()


#pd4.info()

In [None]:
pd4 = pd4.rename(columns=lambda x_ re.sub('_PERIODIC_INDICATIVE', '', x))
pd4 = pd4.rename(columns=lambda x_ re.sub('FD_INTRODUCTION_', '', x))
pd4 = pd4.rename(columns=lambda x_ re.sub('ANNEX_I', 'ANNEX', x))
pd4 = pd4.rename(columns=lambda x_ re.sub('F04_', '', x))
pd4 = pd4.rename(columns=lambda x_ re.sub('AI_OBJECT', 'OBJECT', x))
pd4 = pd4.rename(columns=lambda x_ re.sub('ANNEX_B_', '', x))
pd4 = pd4.rename(columns=lambda x_ re.sub('AI_', '', x))
pd4 = pd4.rename(columns=lambda x_ re.sub('FD_AUTHORITY_', 'FD_AUTHORITY_', x))
pd4 = pd4.rename(columns=lambda x_ re.sub('ANNEX_OBJECT_CONTRACT_OBJECT_DESCRIPTION_DIVISION_INTO_LOTS', 'FD_OBJECT_QUANTITY_SCOPE_DIVISION_INTO_LOTS', x))





#ANNEX_OBJECT_CONTRACT_OBJECT_DESCRIPTION_FRAMEWORK_TOTAL_ESTIMATED_COSTS_RANGE_AND_CURRENCY_CURRENCY FD_OBJECT_QUANTITY_SCOPE_DIVISION_INTO_LOTS_DIV_INTO_LOT_YES_LOT_NATURE_QUANTITY_SCOPE_COSTS_RANGE_AND_CURRENCY_CURRENCY


In [None]:
pd4.info()

In [None]:
#spark.stop()

In [None]:
fd4 = pd4.to_spark()

In [None]:
fd4.write.mode('append').json("s3a_//falk2210/pri.json")

In [None]:
fd4.write.mode('append').json("s3a_//falk2210/fd4.json")

In [None]:
#fd4.write.parquet("s3a_//falk2210/fd4_210921.parquet")

In [None]:
pri = spark.read.json("s3a_//falk2210/pri.json")

In [None]:
pri.printSchema()

# 5 CONTRACT 

<a id='d5' />

In [None]:
d5 = spark.read.json("falk/5.json/*.json")
#d5.printSchema()

In [None]:
pd5 = d5.to_pandas_on_spark()

In [None]:
pd5 = pd5.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('CONTRACT_UTILITIES_', '', x))

In [None]:
pd5['FD_LEFTI_CONTRACT_NOTICE_UTILITIES_CONTRACT_RELATING_CONDITIONS_DEPOSITS_GUARANTEES_REQUIRED'].update(pd5.pop('FD_LEFTI_CONTRACT_NOTICE_UTILITIES_CONTRACT_RELATING_CONDITIONS_DEPOSITS_GUARANTEES_REQUIRED_P'))
pd5['FD_LEFTI_CONTRACT_NOTICE_UTILITIES_CONTRACT_RELATING_CONDITIONS_EXISTENCE_OTHER_PARTICULAR_CONDITIONS'].update(pd5.pop('FD_LEFTI_CONTRACT_NOTICE_UTILITIES_CONTRACT_RELATING_CONDITIONS_EXISTENCE_OTHER_PARTICULAR_CONDITIONS_P'))
pd5['FD_LEFTI_CONTRACT_NOTICE_UTILITIES_CONTRACT_RELATING_CONDITIONS_LEGAL_FORM'].update(pd5.pop('FD_LEFTI_CONTRACT_NOTICE_UTILITIES_CONTRACT_RELATING_CONDITIONS_LEGAL_FORM_P'))
pd5['FD_LEFTI_CONTRACT_NOTICE_UTILITIES_CONTRACT_RELATING_CONDITIONS_MAIN_FINANCING_CONDITIONS'].update(pd5.pop('FD_LEFTI_CONTRACT_NOTICE_UTILITIES_CONTRACT_RELATING_CONDITIONS_MAIN_FINANCING_CONDITIONS_P'))
pd5['FD_LEFTI_CONTRACT_NOTICE_UTILITIES_F05_CONDITIONS_FOR_PARTICIPATION_ECONOMIC_OPERATORS_PERSONAL_SITUATION'].update(pd5.pop('FD_LEFTI_CONTRACT_NOTICE_UTILITIES_F05_CONDITIONS_FOR_PARTICIPATION_ECONOMIC_OPERATORS_PERSONAL_SITUATION_P'))
pd5['FD_LEFTI_CONTRACT_NOTICE_UTILITIES_F05_CONDITIONS_FOR_PARTICIPATION_F05_ECONOMIC_FINANCIAL_CAPACITY'].update(pd5.pop('FD_LEFTI_CONTRACT_NOTICE_UTILITIES_F05_CONDITIONS_FOR_PARTICIPATION_F05_ECONOMIC_FINANCIAL_CAPACITY_P'))
pd5['FD_LEFTI_CONTRACT_NOTICE_UTILITIES_SERVICES_CONTRACTS_SPECIFIC_CONDITIONS_EXECUTION_SERVICE_RESERVED_PARTICULAR_PROFESSION'].update(pd5.pop('FD_LEFTI_CONTRACT_NOTICE_UTILITIES_SERVICES_CONTRACTS_SPECIFIC_CONDITIONS_EXECUTION_SERVICE_RESERVED_PARTICULAR_PROFESSION_P'))
pd5['FD_COMPLEMENTARY_INFORMATION_ADDITIONAL_INFORMATION'].update(pd5.pop('FD_COMPLEMENTARY_INFORMATION_ADDITIONAL_INFORMATION_P'))
pd5['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION'].update(pd5.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION_P'))
pd5['FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_LOCATION_NUTS_LOCATION'].update(pd5.pop('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_LOCATION_NUTS_LOCATION_P'))
pd5['FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_SHORT_CONTRACT_DESCRIPTION'].update(pd5.pop('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_SHORT_CONTRACT_DESCRIPTION_P'))
pd5['FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_TITLE_CONTRACT'].update(pd5.pop('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_TITLE_CONTRACT_P'))
pd5['FD_OBJECT_CONTRACT_INFORMATION_QUANTITY_SCOPE_NATURE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE'].update(pd5.pop('FD_OBJECT_CONTRACT_INFORMATION_QUANTITY_SCOPE_NATURE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE_P'))
pd5['FD_OBJECT_CONTRACT_INFORMATION_QUANTITY_SCOPE_OPTIONS_OPTION_DESCRIPTION'].update(pd5.pop('FD_OBJECT_CONTRACT_INFORMATION_QUANTITY_SCOPE_OPTIONS_OPTION_DESCRIPTION_P'))
pd5['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_UTILITIES_ADMINISTRATIVE_INFORMATION_CONDITIONS_FOR_OPENING_TENDERS_PLACE_OPENING_PLACE_NOT_STRUCTURED'].update(pd5.pop('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_UTILITIES_ADMINISTRATIVE_INFORMATION_CONDITIONS_FOR_OPENING_TENDERS_PLACE_OPENING_PLACE_NOT_STRUCTURED_P'))
pd5['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_UTILITIES_ADMINISTRATIVE_INFORMATION_FILE_REFERENCE_NUMBER'].update(pd5.pop('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_UTILITIES_ADMINISTRATIVE_INFORMATION_FILE_REFERENCE_NUMBER_P'))
pd5['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_UTILITIES_ADMINISTRATIVE_INFORMATION_LANGUAGE_LANGUAGE_OTHER'].update(pd5.pop('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_UTILITIES_ADMINISTRATIVE_INFORMATION_LANGUAGE_LANGUAGE_OTHER_P'))
pd5['FD_LEFTI_CONTRACT_NOTICE_UTILITIES_F05_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY'].update(pd5.pop('FD_LEFTI_CONTRACT_NOTICE_UTILITIES_F05_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_P'))

In [None]:
pd5 = pd5.rename(columns=lambda x_ re.sub('\_P$', '', x))
#pd5.info()

In [None]:
pd5['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_ADDRESS'].update(pd5.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_ADDRESS'))
pd5['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_COUNTRY_VALUE'].update(pd5.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_COUNTRY_VALUE'))
pd5['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_E_MAILS_E_MAIL'].update(pd5.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_E_MAILS_E_MAIL'))
pd5['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_FAX'].update(pd5.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_FAX'))
pd5['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_ORGANISATION_NATIONALID'].update(pd5.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_ORGANISATION_NATIONALID'))
pd5['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_ORGANISATION_OFFICIALNAME'].update(pd5.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_ORGANISATION_OFFICIALNAME'))
pd5['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_PHONE'].update(pd5.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_PHONE'))
pd5['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_POSTAL_CODE'].update(pd5.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_POSTAL_CODE'))
pd5['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_TOWN'].update(pd5.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_TOWN'))
pd5['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_URL'].update(pd5.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_URL'))

In [None]:
pd5 = pd5.rename(columns=lambda x_ re.sub('DOFFIN_FORM_TYPE_NATIONAL_', '', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_MEDIATION_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_', 'COMPLEMENTARY_INFO_ADDRESS_MEDIATION_BODY_', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_INFO_NAME_ADDRESSES_CONTACT_FURTHER_INFORMATION_CONTACT_DATA_', 'CONTRACTING_BODY_ADDRESS_FURTHER_INFO_', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_', 'COMPLEMENTARY_INFO_ADDRESS_REVIEW_BODY_', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_INFO_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_', 'CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_INFO_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_YES_CONTACT_DATA_OTHER_BEHALF_CONTRACTING_AUTORITHY_', 'CONTRACTING_BODY_ADDRESS_PARTICIPATION_', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_LEFTI_CONTRACT_NOTICE_UTILITIES_F05_CONDITIONS_FOR_PARTICIPATION_F05_ECONOMIC_FINANCIAL_CAPACITY', 'LEFTI_ECONOMIC_FINANCIAL_INFO', x))
#pd5 = pd5.rename(columns=lambda x_ re.sub('FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_F02_ECONOMIC_FINANCIAL_CAPACITY_EAF_CAPACITY_MIN_LEVEL', 'LEFTI_ECONOMIC_FINANCIAL_MIN_LEVEL', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_LEFTI_CONTRACT_NOTICE_UTILITIES_F05_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY', 'LEFTI_TECHNICAL_PROFESSIONAL_INFO', x))
#pd5 = pd5.rename(columns=lambda x_ re.sub('FD_LEFTI_F02_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_T_CAPACITY_MIN_LEVEL', 'LEFTI_TECHNICAL_PROFESSIONAL_MIN_LEVEL', x)) 
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_CPV_CPV_MAIN_CPV_CODE_CODE', 'OBJECT_CONTRACT_CPV_MAIN_CPV_CODE_CODE', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_CPV_CPV_ADDITIONAL_CPV_CODE_CODE', 'OBJECT_CONTRACT_OBJECT_DESCR_CPV_ADDITIONAL_CPV_SUPPLEMENTARY_CODE_CODE', x)) 
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_F05_DIVISION_INTO_LOTS_F05_DIV_INTO_LOT_YES_F05_ANNEX_B_CPV_CPV_ADDITIONALL', 'OBJECT_CONTRACT_OBJECT_DESCR_CPV_ADDITIONAL', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_F05_DIVISION_INTO_LOTS_F05_DIV_INTO_LOT_YES_F05_ANNEX_B_CPV_CPV_ADDITIONAL_CPV_CODE_CODE', 'OBJECT_CONTRACT_OBJECT_DESCR_CPV_ADDITIONAL_CPV_CODE_CODE', x))
#pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_ITEM', 'OBJECT_CONTRACT_OBJECT_DESCR_ITEM', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_F05_DIVISION_INTO_LOTS_F05_DIV_INTO_LOT_YES_F05_ANNEX_B_LOT_NUMBER', 'OBJECT_CONTRACT_OBJECT_DESCR_LOT_NO', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_F05_DIVISION_INTO_LOTS_F05_DIV_INTO_LOT_YES_F05_ANNEX_B_LOT_DESCRIPTION', 'OBJECT_CONTRACT_OBJECT_DESCR_SHORT_DESCR', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_F05_DIVISION_INTO_LOTS_F05_DIV_INTO_LOT_YES_F05_ANNEX_B_LOT_TITLE', 'OBJECT_CONTRACT_OBJECT_DESCR_TITLE', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_LOCATION_NUTS_NUTS_CODE', 'OBJECT_CONTRACT_OBJECT_DESCR_NUTS_CODE', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_LOCATION_NUTS_NUTS_LOCATION', 'OBJECT_CONTRACT_OBJECT_DESCR_LOCATION', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_TITLE_CONTRACT', 'OBJECT_CONTRACT_TITLE', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_UTILITIES_ADMINISTRATIVE_INFORMATION_CONDITIONS_FOR_OPENING_TENDERS_PLACE_OPENING_', 'PROCEDURE_OPENING_CONDITION_', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_ADDITIONAL_INFORMATION', 'COMPLEMENTARY_INFO_INFO_ADD', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_RELATES_TO_EU_PROJECT_YES', 'OBJECT_CONTRACT_OBJECT_DESCR_EU_PROGR_RELATED', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_INFO_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_URL_INFORMATION', 'CONTRACTING_BODY_URL_DOCUMENT', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_INFO_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_URL_PARTICIPATE', 'CONTRACTING_BODY_URL_PARTICIPATION', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_CTYPE', 'OBJECT_CONTRACT_TYPE_CONTRACT_CTYPE', x))  
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_LEFTI_CONTRACT_NOTICE_UTILITIES_CONTRACT_RELATING_CONDITIONS_DEPOSITS_GUARANTEES_REQUIRED', 'LEFTI_DEPOSIT_GUARANTEE_REQUIRED', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_LEFTI_CONTRACT_NOTICE_UTILITIES_CONTRACT_RELATING_CONDITIONS_LEGAL_FORM', 'LEFTI_LEGAL_FORM', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_LEFTI_CONTRACT_NOTICE_UTILITIES_CONTRACT_RELATING_CONDITIONS_MAIN_FINANCING_CONDITIONS', 'LEFTI_MAIN_FINANCING_CONDITION', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_CONTRACT_COVERED_GPA_VALUE', 'PROCEDURE_CONTRACT_COVERED_GPA_CTYPE', x))                                       
#pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_FRAMEWORK_JUSTIFICATION', 'PROCEDURE_FRAMEWORK_JUSTIFICATION', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('ORGANISATION_NATIONALID', 'NATIONALID', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('ORGANISATION_OFFICIALNAME', 'OFFICIALNAME', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_F05_DIVISION_INTO_LOTS_F05_DIV_INTO_LOT_YES_F05_ANNEX_B_ADDITIONAL_INFORMATION_ABOUT_LOTS', 'OBJECT_CONTRACT_OBJECT_DESCR_INFO_ADD', x))

In [None]:
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE', 'FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_NOTICE_DISPATCH_DATE', x))

pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_OBJECT_DESCRIPTION_F05_DIVISION_INTO_LOTS_F05_DIV_INTO_LOT_YES_F05_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE', 'FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE', x))

pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE', 'FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE', x))

pd5 = pd5.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_UTILITIES_ADMINISTRATIVE_INFORMATION_CONDITIONS_FOR_OPENING_TENDERS_DATE_TIME', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE_TIME', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_UTILITIES_ADMINISTRATIVE_INFORMATION_CONDITIONS_FOR_MORE_INFORMATION_CONDITIONS_OBTAINING_SPECIFICATIONS', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_UTILITIES_ADMINISTRATIVE_INFORMATION_MINIMUM_TIME_MAINTAINING_TENDER', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_UTILITIES_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F5_PREVIOUS_PUBLICATION_EXISTS_F5_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_UTILITIES_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F5_PREVIOUS_PUBLICATION_EXISTS_F5_PREVIOUS_PUBLICATION_NOTICE_F5_DATE_OJ', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_UTILITIES_ADMINISTRATIVE_INFORMATION_RECEIPT_LIMIT_DATE', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE', x))

In [None]:
pd5.info()

In [None]:
fd5 = pd5.to_spark()

In [None]:
fd5 = fd5.withColumn("COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_NOTICE_DISPATCH_DATE_YEAR`, `FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_NOTICE_DISPATCH_DATE_MONTH`, `FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_NOTICE_DISPATCH_DATE_DAY`)"))

In [None]:
fd5 = fd5.withColumn("FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE", expr("make_date(`FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_YEAR`, `FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_MONTH`, `FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_DAY`)"))

In [None]:
fd5 = fd5.withColumn("FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE", expr("make_date(`FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_YEAR`, `FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_MONTH`, `FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_DAY`)"))

In [None]:
fd5 = fd5.withColumn("FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE", expr("make_date(`FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_YEAR`, `FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_MONTH`, `FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_DAY`)"))

In [None]:
fd5 = fd5.withColumn("FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE", expr("make_date(`FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_YEAR`, `FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_MONTH`, `FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_DAY`)"))

In [None]:
fd5 = fd5.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE_TIME_YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE_TIME_MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE_TIME_DAY`)"))

In [None]:
fd5 = fd5.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS_TIME_LIMIT_YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS_TIME_LIMIT_MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS_TIME_LIMIT_DAY`)"))

In [None]:
#fd5 = fd5.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE_YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE_MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE_DAY`)"))
fd5 = fd5.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE_YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE_MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE_DAY`)"))

In [None]:
fd5 = fd5.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_DAY`)"))

In [None]:
fd5 = fd5.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ_YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ_MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ_DAY`)"))

In [None]:
fd5 = fd5.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE_YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE_MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE_DAY`)"))

In [None]:
fd5.printSchema()

In [None]:
pd5 = fd5.to_pandas_on_spark()

#pd5.info()

In [None]:
pd5 = pd5.drop(columns=['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_NOTICE_DISPATCH_DATE_DAY', 'FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_NOTICE_DISPATCH_DATE_MONTH', 'FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_NOTICE_DISPATCH_DATE_YEAR',\
                        'FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_DAY', 'FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_MONTH', 'FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_YEAR',\
                        'FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_DAY', 'FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_MONTH', 'FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_F02_DIVISION_INTO_LOTS_F02_DIV_INTO_LOT_YES_F02_ANNEX_B_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_YEAR',\
                        'FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_DAY', 'FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_MONTH', 'FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE_YEAR',\
                        'FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_DAY', 'FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_MONTH', 'FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_YEAR',\
                        'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE_TIME_DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE_TIME_MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE_TIME_YEAR'])

In [None]:
pd5 = pd5.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS_TIME_LIMIT_DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS_TIME_LIMIT_MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_OBTAINING_SPECIFICATIONS_TIME_LIMIT_YEAR',\
                        'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE_DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE_MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE_YEAR',\
                        'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE_DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE_MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE_YEAR',\
                        'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_YEAR',\
                        'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ_DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ_MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F2_PREVIOUS_PUBLICATION_EXISTS_F2_PREVIOUS_PUBLICATION_NOTICE_F2_DATE_OJ_YEAR',\
                        'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE_DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE_MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE_YEAR'])

In [None]:
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_CONDITIONS_FOR_OPENING_TENDERS_DATE', 'PROCEDURE_OPENING_CONDITION_DATE_OPENING_TENDERS', x))  
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_RECEIPT_LIMIT_DATE', 'PROCEDURE_DATE_RECEIPT_TENDERS', x))    
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DISPATCH_INVITATIONS_DATE', 'PROCEDURE_DATE_DISPATCH_INVITATIONS', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_MINIMUM_TIME_MAINTAINING_TENDER_UNTIL_DATE', 'PROCEDURE_DATE_TENDER_VALID', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE', 'OBJECT_CONTRACT_OBJECT_DESCR_DATE_START', x))
pd5 = pd5.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_END_DATE', 'OBJECT_CONTRACT_OBJECT_DESCR_DATE_END', x))

In [None]:
pd5.info()

In [None]:
pd5.info()

In [None]:
fd5 = pd5.to_spark()

In [None]:
fd5.write.parquet("s3a_//falk2210/fd5_210921.parquet")

# 6 AWARD_CONTRACT 

<a id='d6' />

In [None]:
d6 = spark.read.json("falk/6.json/*.json")
#d6.printSchema()

In [None]:
pd6 = d6.to_pandas_on_spark()

In [None]:
pd6 = pd6.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd6.info()

In [None]:
pd6 = pd6.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

In [None]:
pd6 = pd6.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd6.info()

In [None]:
pd6 = pd6.rename(columns=lambda x_ re.sub('CONTRACT_AWARD_UTILITIES_', '', x))
#pd6.info()

In [None]:
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_TITLE_CONTRACT'].update(pd6.pop('FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_TITLE_CONTRACT_P'))
pd6['FD_AWARD_CONTRACT_MANDATORY_INFORMATION_NOT_INTENDED_PUBLICATION_TITLE_CONTRACT'].update(pd6.pop('FD_AWARD_CONTRACT_MANDATORY_INFORMATION_NOT_INTENDED_PUBLICATION_TITLE_CONTRACT_P'))
pd6['FD_COMPLEMENTARY_INFORMATION_ADDITIONAL_INFORMATION'].update(pd6.pop('FD_COMPLEMENTARY_INFORMATION_ADDITIONAL_INFORMATION_P'))
pd6['FD_OBJECT_DESCRIPTION_LOCATION_NUTS_LOCATION'].update(pd6.pop('FD_OBJECT_DESCRIPTION_LOCATION_NUTS_LOCATION_P'))
pd6['FD_OBJECT_DESCRIPTION_SHORT_DESCRIPTION'].update(pd6.pop('FD_OBJECT_DESCRIPTION_SHORT_DESCRIPTION_P'))
pd6['FD_OBJECT_DESCRIPTION_TITLE_CONTRACT'].update(pd6.pop('FD_OBJECT_DESCRIPTION_TITLE_CONTRACT_P'))
pd6['FD_PROCEDURES_ADMINISTRATIVE_INFO_REFERENCE_NUMBER_ATTRIBUTED'].update(pd6.pop('FD_PROCEDURES_ADMINISTRATIVE_INFO_REFERENCE_NUMBER_ATTRIBUTED_P'))

In [None]:
pd6 = pd6.rename(columns=lambda x_ re.sub('\_P$', '', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('F06_', '', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('_F06', '', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_', '', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('DOFFIN_FORM_TYPE_', '', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_', '', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('ORGANISATION_', '', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('PREVIOUS_PUBLICATION_NOTICE_F6', 'PREVIOUS_PUBLICATION_NOTICE', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('PREVIOUS_NOTICE_BUYER_PROFILE_F6', 'PREVIOUS_NOTICE_BUYER_PROFILE', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('ENTITY_', '', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_AWARD_CONTRACT_MANDATORY_INFORMATION_NOT_INTENDED_PUBLICATION_', '', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('PREVIOUS_PUBLICATION_EXISTS_F6_', 'PREVIOUS_PUBLICATION_EXISTS_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('E_MAILS_', '', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_PREVIOUS_PUBLICATION_EXISTS_', 'FD_PREVIOUS_PUBLICATION_EXISTS_', x))

In [None]:
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_APPEAL_PROCEDURES_RESPONSIBLE_FOR_APPEAL_PROCEDURES_', 'FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_APPEAL_PROCEDURES_SERVICE_FROM_INFORMATION_', 'FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_', x))

In [None]:
pd6['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_ADDRESS'].update(pd6.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_ADDRESS'))
pd6['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_COUNTRY_VALUE'].update(pd6.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_COUNTRY_VALUE'))
pd6['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_E_MAIL'].update(pd6.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_E_MAIL'))
#pd6['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_FAX'].update(pd6.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_FAX'))
pd6['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_NATIONALID'].update(pd6.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_NATIONALID'))
pd6['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_OFFICIALNAME'].update(pd6.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_OFFICIALNAME'))
pd6['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_PHONE'].update(pd6.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_PHONE'))
pd6['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_POSTAL_CODE'].update(pd6.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_POSTAL_CODE'))
pd6['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_TOWN'].update(pd6.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_TOWN'))
#pd6['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_URL'].update(pd6.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_URL'))

In [None]:
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_ADDRESS'].update(pd6.pop('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_ADDRESS'))
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_COUNTRY_VALUE'].update(pd6.pop('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_COUNTRY_VALUE'))
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_E_MAIL'].update(pd6.pop('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_E_MAIL'))
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_FAX'].update(pd6.pop('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_FAX'))
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_NATIONALID'].update(pd6.pop('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_NATIONALID'))
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_OFFICIALNAME'].update(pd6.pop('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_OFFICIALNAME'))
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_PHONE'].update(pd6.pop('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_PHONE'))
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_POSTAL_CODE'].update(pd6.pop('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_POSTAL_CODE'))
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_TOWN'].update(pd6.pop('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_TOWN'))
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_URL'].update(pd6.pop('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_URL'))
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_CONTRACT_NO'].update(pd6.pop('CONTRACT_NO'))
#pd6[''].update(pd6.pop('COUNTRY_ORIGIN_PRODUCT_NO_COMMUNITY_ORIGIN'))
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_ITEM'].update(pd6.pop('ITEM'))
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_LOT_NUMBER'].update(pd6.pop('LOT_NUMBER'))
#pd6[''].update(pd6.pop('NUMBER_CONTRACTS_AWARDED'))
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_OFFERS_RECEIVED_NUMBER'].update(pd6.pop('OFFERS_RECEIVED_NUMBER'))
pd6['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_OFFERS_RECEIVED_NUMBER_MEANING'].update(pd6.pop('OFFERS_RECEIVED_NUMBER_MEANING'))
pd6['FD_PROCEDURES_AWARD_CRITERIA_CONTRACT_UTILITIES_INFORMATION_PRICE_AWARD_CRITERIA_PRICE'].update(pd6.pop('PRICE_AWARD_CRITERIA_PRICE'))
pd6['FD_OBJECT_DESCRIPTION_TITLE_CONTRACT'].update(pd6.pop('TITLE_CONTRACT'))

In [None]:
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_', 'AWARDED_CONTRACT_CONTRACTOR_ADDRESS_CONTRACTOR_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_', 'AWARD_CONTRACT_', x)) #ITEM
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_APPEAL_PROCEDURES_RESPONSIBLE_FOR_MEDIATION_PROCEDURES_', 'COMPLEMENTARY_INFO_ADDRESS_MEDIATION_BODY_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_', 'COMPLEMENTARY_INFO_ADDRESS_REVIEW_BODY_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE_', 'COMPLEMENTARY_INFO_', x)) #COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_', 'CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_', x))
#pd6 = pd6.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_ADDRESS_FURTHER_INFO_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_CONTRACTING_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_YES_CONTACT_DATA_OTHER_BEHALF_CONTRACTING_AUTORITHY_', 'CONTRACTING_BODY_ADDRESS_PARTICIPATION_', x))
#pd6 = pd6.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_', x)) #CONTRACTING_BODY_CA_ACTIVITY_VALUE
#pd6 = pd6.rename(columns=lambda x_ re.sub('', 'LEFTI_', x)) #LEFTI_PARTICULAR_PROFESSION_CTYPE
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_OBJECT_DESCRIPTION_CPV_', 'OBJECT_CONTRACT_', x))  #OBJECT_CONTRACT_CPV_MAIN_CPV_CODE_CODE
#pd6 = pd6.rename(columns=lambda x_ re.sub('', 'OBJECT_CONTRACT_OBJECT_DESCR_', x)) #OBJECT_CONTRACT_OBJECT_DESCR_CPV_ADDITIONAL
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_', 'OBJECT_CONTRACT_OBJECT_DESCR_', x))  #OBJECT_CONTRACT_OBJECT_DESCR_ITEM
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_OBJECT_DESCRIPTION_', 'OBJECT_CONTRACT_', x)) #OBJECT_CONTRACT_TITLE
#pd6 = pd6.rename(columns=lambda x_ re.sub('', 'PROCEDURE_', x)) #PROCEDURE_DATE_RECEIPT_TENDERS
pd6 = pd6.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_TITLE_CONTRACT', 'OBJECT_CONTRACT_OBJECT_DESCR_TITLE', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_SHORT_DESCRIPTION', 'OBJECT_CONTRACT_OBJECT_DESCR_SHORT_DESCR', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_LIKELY_SUB_CONTRACTED_EXCLUDING_VAT_VALUE_CURRENCY', 'AWARDED_CONTRACT_VAL_SUBCONTRACTING_CURRENCY', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_LIKELY_SUB_CONTRACTED_EXCLUDING_VAT_VALUE_text', 'AWARDED_CONTRACT_VAL_SUBCONTRACTING_text', x))

pd6 = pd6.rename(columns=lambda x_ re.sub('FD_OBJECT_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_CURRENCY', 'OBJECT_CONTRACT_VAL_TOTAL_CURRENCY', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_OBJECT_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_HIGH_VALUE', 'OBJECT_CONTRACT_VAL_RANGE_TOTAL_HIGH', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_OBJECT_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_LOW_VALUE', 'OBJECT_CONTRACT_VAL_RANGE_TOTAL_LOW', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_OBJECT_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_VALUE_COST', 'OBJECT_CONTRACT_VAL_TOTAL_text', x))

pd6 = pd6.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_INFORMATION_VALUE_CONTRACT_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_CURRENCY', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_INFORMATION_VALUE_CONTRACT_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_HIGH_VALUE', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_HIGH', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_INFORMATION_VALUE_CONTRACT_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_LOW_VALUE', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_LOW', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_INFORMATION_VALUE_CONTRACT_INITIAL_ESTIMATED_TOTAL_VALUE_CONTRACT_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_CURRENCY', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_INFORMATION_VALUE_CONTRACT_INITIAL_ESTIMATED_TOTAL_VALUE_CONTRACT_VALUE_COST', 'AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_text', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_PROCEDURES_TYPE_PROCEDURE_AWARD_AWARD_WITHOUT_PRIOR_PUBLICATION_JUSTI_CHOICE_COMPETITION_REASON_CONTRACT_LAWFUL', 'PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_PUBLICATION_D_JUSTIFICATION', x))

In [None]:
#pd6.info()

In [None]:
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE_', 'FD_AWARD_OF_CONTRACT_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_CHP_', 'ECONOMIC_OPERATOR_NAME_ADDRESS_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_CONTRACT_LIKELY_SUB_CONTRACTED_', 'FD_AWARD_OF_CONTRACT_MORE_INFORMATION_TO_SUB_CONTRACTED_CONTRACT_LIKELY_SUB_CONTRACTED_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('DATE_OF_CONTRACT_AWARD_', 'CONTRACT_AWARD_DATE_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_INFORMATION_VALUE_CONTRACT_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_', 'FD_AWARD_OF_CONTRACT_CONTRACT_VALUE_INFORMATION_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('CONTRACT_NO', 'CONTRACT_NUMBER', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_APPEAL_PROCEDURES_RESPONSIBLE_FOR_MEDIATION_PROCEDURES_', 'FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_MEDIATION_PROCEDURE_BODY_RESPONSIBLE_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_', 'FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_CONTRACTING_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_YES_CONTACT_DATA_OTHER_BEHALF_CONTRACTING_AUTORITHY_', 'FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_YES_CONTACT_DATA_OTHER_BEHALF_CONTRACTING_AUTORITHY_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_OBJECT_DESCRIPTION_', 'FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_OBJECT_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_', 'FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_TITLE_CONTRACT', 'FD_AWARD_OF_CONTRACT_CONTRACT_TITLE', x))
#pd6 = pd6.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_URL_DOCUMENT', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_URL_PARTICIPATE', 'CONTRACTING_BODY_URL_PARTICIPATION', x))
#pd6 = pd6.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_URL_TOOL', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_URL_GENERAL', 'CONTRACTING_BODY_ADDRESS_FURTHER_INFO_URL_GENERAL', x))
#pd6 = pd6.rename(columns=lambda x_ re.sub('', 'PROCEDURE_URL_NATIONAL_PROCEDURE', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_', 'CONTRACTING_BODY_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_TITLE_CONTRACT', 'AWARD_CONTRACT_TITLE', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_LOT_NUMBER', 'AWARD_CONTRACT_LOT_NO', x)) 
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_ADDITIONAL_INFORMATION', 'COMPLEMENTARY_INFO_INFO_ADD', x)) 
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_PROCEDURES_ADMINISTRATIVE_INFO_REFERENCE_NUMBER_ATTRIBUTED', 'OBJECT_CONTRACT_REFERENCE_NUMBER', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_CONTRACTING_ACTIVITIES_OF_CONTRACTING_ENTITY_', 'FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_ACTIVITIES_OF_CONTRACTING_ENTITY_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_INFORMATION_VALUE_CONTRACT_', 'AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_ACTIVITIES_OF_CONTRACTING_ENTITY_', 'FD_CONTRACTING_ACTIVITIES_OF_CONTRACTING_ENTITY_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_TYPE_CONTRACT_LOCATION_W_PUB_', 'OBJECT_CONTRACT_TYPE_CONTRACT_LOCATION_', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('SERVICE_CATEGORY_PUB', 'SERVICE_CATEGORY', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_INFORMATION_VALUE_CONTRACT_', 'AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_', x))

pd6 = pd6.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_NUMBER_MONTHS', 'AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_MORE_INFORMATION_IF_ANNUAL_MONTHLY_NUMBER_OF_MONTHS', x))
pd6 = pd6.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_NUMBER_YEARS', 'AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_MORE_INFORMATION_IF_ANNUAL_MONTHLY_NUMBER_OF_YEARS', x))

In [None]:
#pd3.info()

In [None]:
pd6.info()

In [None]:
fd6 = pd6.to_spark()

In [None]:
"""
fd6 = fd6.withColumn("FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE.DATE_OF_CONTRACT_AWARD", expr("make_date(`FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE.DATE_OF_CONTRACT_AWARD.YEAR`, `FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE.DATE_OF_CONTRACT_AWARD.MONTH`, `FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE.DATE_OF_CONTRACT_AWARD.DAY`)"))
fd6 = fd6.withColumn("FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.DAY`)"))
fd6 = fd6.withColumn("FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.CNT_NOTICE_INFORMATION.DATE_OJ", expr("make_date(`FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.CNT_NOTICE_INFORMATION.DATE_OJ.YEAR`, `FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.CNT_NOTICE_INFORMATION.DATE_OJ.MONTH`, `FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.CNT_NOTICE_INFORMATION.DATE_OJ.DAY`)"))
fd6 = fd6.withColumn("FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.EX_ANTE_NOTICE_INFORMATION.DATE_OJ", expr("make_date(`FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.YEAR`, `FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.MONTH`, `FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.DAY`)"))
fd6 = fd6.withColumn("FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ", expr("make_date(`FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.YEAR`, `FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.MONTH`, `FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.DAY`)"))
fd6 = fd6.withColumn("FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.PREVIOUS_PUBLICATION_NOTICE_F6.DATE_OJ", expr("make_date(`FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.PREVIOUS_PUBLICATION_NOTICE_F6.DATE_OJ.YEAR`, `FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.PREVIOUS_PUBLICATION_NOTICE_F6.DATE_OJ.MONTH`, `FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.PREVIOUS_PUBLICATION_NOTICE_F6.DATE_OJ.DAY`)"))
"""

In [None]:
#fd6.printSchema()

In [None]:
"""

pd6 = fd6.to_pandas_on_spark()

pd6.info()
"""

In [None]:
"""
pd6 = pd6.drop(columns=['FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE.DATE_OF_CONTRACT_AWARD.DAY', 'FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE.DATE_OF_CONTRACT_AWARD.MONTH', 'FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE.DATE_OF_CONTRACT_AWARD.YEAR'])
pd6 = pd6.drop(columns=['FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.YEAR'])
pd6 = pd6.drop(columns=['FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.CNT_NOTICE_INFORMATION.DATE_OJ.DAY', 'FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.CNT_NOTICE_INFORMATION.DATE_OJ.MONTH', 'FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.CNT_NOTICE_INFORMATION.DATE_OJ.YEAR'])
pd6 = pd6.drop(columns=['FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.DAY', 'FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.MONTH', 'FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.YEAR'])
pd6 = pd6.drop(columns=['FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.DAY', 'FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.MONTH', 'FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.YEAR'])
pd6 = pd6.drop(columns=['FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.PREVIOUS_PUBLICATION_NOTICE_F6.DATE_OJ.DAY', 'FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.PREVIOUS_PUBLICATION_NOTICE_F6.DATE_OJ.MONTH', 'FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.PREVIOUS_PUBLICATION_NOTICE_F6.DATE_OJ.YEAR'])
"""

In [None]:
#pd6.info()

In [None]:
"""
pd6["FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE.DATE_OF_CONTRACT_AWARD"]= ps.to_datetime(pd6["FD_AWARD_CONTRACT_AWARD_AND_CONTRACT_VALUE.DATE_OF_CONTRACT_AWARD"])
pd6["FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd6["FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE"])
pd6["FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.CNT_NOTICE_INFORMATION.DATE_OJ"]= ps.to_datetime(pd6["FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.CNT_NOTICE_INFORMATION.DATE_OJ"])
pd6["FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.EX_ANTE_NOTICE_INFORMATION.DATE_OJ"]= ps.to_datetime(pd6["FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.EX_ANTE_NOTICE_INFORMATION.DATE_OJ"])
pd6["FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ"]= ps.to_datetime(pd6["FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ"])
pd6["FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.PREVIOUS_PUBLICATION_NOTICE_F6.DATE_OJ"]= ps.to_datetime(pd6["FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6.PREVIOUS_PUBLICATION_EXISTS_F6.PREVIOUS_PUBLICATION_NOTICE_F6.DATE_OJ"])
"""

In [None]:
#pd6.info()

In [None]:
#pd6['DOFFIN_APPENDIX.DOFFIN_FORM_TYPE.NATIONAL.REFERENCE_SECTION.REFERENCE_DOFFIN.TYPE'].unique()

In [None]:
#fd6 = pd6.to_spark()

In [None]:
fd6.write.mode("append").json("falk2210/f_award.json")

In [None]:
#fd6.write.parquet("s3a_//falk2210/fd6_210921.parquet")

In [None]:
#award = spark.read.json("falk2210/f_award.json")

In [None]:
#award.printSchema()

# 7 QUALIFICATION  dynamisk purchasing

<a id='d7' />

In [None]:
d7 = spark.read.json("falk/7.json/*.json")
#d7.printSchema()

In [None]:
pd7 = d7.to_pandas_on_spark()

In [None]:
pd7 = pd7.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd7.info()

In [None]:
pd7 = pd7.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd7 = pd7.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd7.info()

In [None]:
pd7 = pd7.rename(columns=lambda x_ re.sub('QUALIFICATION_SYSTEM_UTILITIES_', '', x))
#pd7.info()

In [None]:
pd7 = pd7.rename(columns=lambda x_ re.sub('\_P$', '', x))
#pd7.info()

In [None]:
fd7 = pd7.to_spark()

In [None]:
fd7 = fd7.withColumn("FD_COMPLEMENTARY_INFORMATION_QUALIFICATION_SYSTEM.NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_QUALIFICATION_SYSTEM.NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_QUALIFICATION_SYSTEM.NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_QUALIFICATION_SYSTEM.NOTICE_DISPATCH_DATE.DAY`)"))
fd7 = fd7.withColumn("FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_FROM", expr("make_date(`FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_FROM.YEAR`, `FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_FROM.MONTH`, `FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_FROM.DAY`)"))
fd7 = fd7.withColumn("FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_UNTIL", expr("make_date(`FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_UNTIL.YEAR`, `FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_UNTIL.MONTH`, `FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_UNTIL.DAY`)"))

In [None]:
fd7.printSchema()

In [None]:
pd7 = fd7.to_pandas_on_spark()

pd7.info()

In [None]:
pd7 = pd7.drop(columns=['FD_COMPLEMENTARY_INFORMATION_QUALIFICATION_SYSTEM.NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_QUALIFICATION_SYSTEM.NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_QUALIFICATION_SYSTEM.NOTICE_DISPATCH_DATE.YEAR'])
pd7 = pd7.drop(columns=['FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_FROM.DAY', 'FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_FROM.MONTH', 'FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_FROM.YEAR'])
pd7 = pd7.drop(columns=['FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_UNTIL.DAY', 'FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_UNTIL.MONTH', 'FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_UNTIL.YEAR'])

In [None]:
pd7.info()

In [None]:
pd7["FD_COMPLEMENTARY_INFORMATION_QUALIFICATION_SYSTEM.NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd7["FD_COMPLEMENTARY_INFORMATION_QUALIFICATION_SYSTEM.NOTICE_DISPATCH_DATE"])
pd7["FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_FROM"]= ps.to_datetime(pd7["FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_FROM"])
pd7["FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_UNTIL"]= ps.to_datetime(pd7["FD_PROCEDURES_QUALIFICATION_SYSTEM.ADMINISTRATIVE_INFORMATION_QUALIFICATION_SYSTEM.DURATION_QUALIFICATION_SYSTEM.DURATION_UNTIL"])

In [None]:
pd7.info()

In [None]:
pd7['DOFFIN_APPENDIX.DOFFIN_FORM_TYPE.NATIONAL.REFERENCE_SECTION.REFERENCE_DOFFIN.TYPE'].unique()

In [None]:
fd7 = pd7.to_spark()

fd7.write.parquet("s3a_//falk2210/fd7_210921.parquet")

# 8 BUYER_PROFILE 

<a id='d8' />

In [None]:
d8 = spark.read.json("falk/8.json/*.json")
d8.printSchema()

In [None]:
pd8 = d8.to_pandas_on_spark()

In [None]:
pd8 = pd8.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
pd8.info()

In [None]:
pd8 = pd8.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd8 = pd8.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
pd8.info()

In [None]:
pd8 = pd8.rename(columns=lambda x_ re.sub('BUYER_PROFILE_', '', x))
pd8.info()

In [None]:
#pd8 = pd8.rename(columns=lambda x_ re.sub('_', '_', x))
#pd8.info()

In [None]:
pd8 = pd8.rename(columns=lambda x_ re.sub('\_P$', '', x))
pd8.info()

In [None]:
fd8 = pd8.to_spark()

In [None]:
fd8 = fd8.withColumn("FD_COMPLEMENTARY_INFORMATION_NOTICE_NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_NOTICE_NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_NOTICE_DISPATCH_DATE.DAY`)"))

In [None]:
fd8.printSchema()

In [None]:
pd8 = fd8.to_pandas_on_spark()

pd8.info()

In [None]:
pd8 = pd8.drop(columns=['FD_COMPLEMENTARY_INFORMATION_NOTICE_NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_NOTICE_DISPATCH_DATE.YEAR'])

In [None]:
pd8.info()

In [None]:
pd8["FD_COMPLEMENTARY_INFORMATION_NOTICE_NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd8["FD_COMPLEMENTARY_INFORMATION_NOTICE_NOTICE_DISPATCH_DATE"])

In [None]:
pd8.info() 

In [None]:
fd8 = pd8.to_spark()

fd8.write.parquet("s3a_//falk2210/fd8_210921.parquet")

# 9 CONTRACT

<a id='d9' />

In [None]:
d9 = spark.read.json("falk/9.json/*.json")
#d9.printSchema()

In [None]:
pd9 = d9.to_pandas_on_spark()

In [None]:
pd9 = pd9.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd9.info()

In [None]:
pd9 = pd9.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd9 = pd9.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd9.info()

In [None]:
pd9 = pd9.rename(columns=lambda x_ re.sub('SIMPLIFIED_CONTRACT_', '', x))
#pd9.info()

In [None]:
#pd9 = pd9.rename(columns=lambda x_ re.sub('_', '_', x))
#pd9.info()

In [None]:
pd9 = pd9.rename(columns=lambda x_ re.sub('\_P$', '', x))
pd9.info()

In [None]:
fd9 = pd9.to_spark()

In [None]:
fd9 = fd9.withColumn("FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.DAY`)"))
fd9 = fd9.withColumn("FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_OJ.DATE_OJ", expr("make_date(`FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_OJ.DATE_OJ.YEAR`, `FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_OJ.DATE_OJ.MONTH`, `FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_OJ.DATE_OJ.DAY`)"))
fd9 = fd9.withColumn("FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP", expr("make_date(`FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP.YEAR`, `FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP.MONTH`, `FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP.DAY`)"))

In [None]:
fd9.printSchema()

In [None]:
pd9 = fd9.to_pandas_on_spark()

In [None]:
pd9.info()

In [None]:
pd9 = pd9.drop(columns=['FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.YEAR'])
pd9 = pd9.drop(columns=['FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_OJ.DATE_OJ.DAY', 'FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_OJ.DATE_OJ.MONTH', 'FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_OJ.DATE_OJ.YEAR'])
pd9 = pd9.drop(columns=['FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP.DAY', 'FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP.MONTH', 'FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP.YEAR'])

In [None]:
pd9.info()

In [None]:
pd9["FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd9["FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE"])
pd9["FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_OJ.DATE_OJ"]= ps.to_datetime(pd9["FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_OJ.DATE_OJ"])
pd9["FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP"]= ps.to_datetime(pd9["FD_PROCEDURES_SIMPLIFIED_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP"])

In [None]:
pd9.info() 

In [None]:
pd9['FORM'].unique()

In [None]:
fd9 = pd9.to_spark()

fd9.write.parquet("s3a_//falk2210/fd9_210921.parquet")

# 10 Contract 

<a id='d10' />

In [None]:
d10 = spark.read.json("falk/10.json/*.json")
#d10.printSchema()

In [None]:
pd10 = d10.to_pandas_on_spark()

In [None]:
pd10 = pd10.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd10.info()

In [None]:
pd10 = pd10.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd10 = pd10.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd10.info()

In [None]:
pd10 = pd10.rename(columns=lambda x_ re.sub('CONCESSION_', '', x))
#pd10.info()

In [None]:
pd10 = pd10.rename(columns=lambda x_ re.sub('\_P$', '', x))
pd10.info()

In [None]:
fd10 = pd10.to_spark()

In [None]:
fd10 = fd10.withColumn("FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.DAY`)"))
fd10 = fd10.withColumn("FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP", expr("make_date(`FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP.YEAR`, `FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP.MONTH`, `FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP.DAY`)"))

In [None]:
fd10.printSchema()

In [None]:
pd10 = fd10.to_pandas_on_spark()

pd10.info()

In [None]:
pd10 = pd10.drop(columns=['FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.YEAR'])
pd10 = pd10.drop(columns=['FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP.DAY', 'FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP.MONTH', 'FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP.YEAR'])

In [None]:
pd10.info()

In [None]:
pd10["FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd10["FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE"])
pd10["FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP"]= ps.to_datetime(pd10["FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_TIME_LIMIT_CHP"])

In [None]:
pd10.info() 

In [None]:
pd10.head()

In [None]:
pd10['DOFFIN_APPENDIX.DOFFIN_FORM_TYPE.NATIONAL.REFERENCE_SECTION.REFERENCE_DOFFIN.TYPE'].unique()

In [None]:
fd10 = pd10.to_spark()

fd10.write.parquet("s3a_//falk2210/fd10_210921.parquet")

# 11 contract

<a id='d11' />

In [None]:
d11 = spark.read.json("falk/11.json/*.json")
#d11.printSchema()

In [None]:
pd11 = d11.to_pandas_on_spark()

In [None]:
pd11 = pd11.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd11.info()

In [None]:
pd11 = pd11.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd11 = pd11.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd11.info()

In [None]:
pd11 = pd11.rename(columns=lambda x_ re.sub('CONTRACT_CONCESSIONAIRE_', '', x))
#pd11.info()

In [None]:
pd11 = pd11.rename(columns=lambda x_ re.sub('\_P$', '', x))
pd11.info()

In [None]:
fd11 = pd11.to_spark()

In [None]:
fd11 = fd11.withColumn("FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.DAY`)"))
fd11 = fd11.withColumn("FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE", expr("make_date(`FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR`, `FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH`, `FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY`)"))
fd11 = fd11.withColumn("FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE", expr("make_date(`FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR`, `FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH`, `FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY`)"))
fd11 = fd11.withColumn("FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_DISPATCH_INVITATIONS_DATE", expr("make_date(`FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_DISPATCH_INVITATIONS_DATE.YEAR`, `FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_DISPATCH_INVITATIONS_DATE.MONTH`, `FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_DISPATCH_INVITATIONS_DATE.DAY`)"))
fd11 = fd11.withColumn("FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_F11_TIME_LIMIT_TYPE.TIME_LIMIT", expr("make_date(`FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_F11_TIME_LIMIT_TYPE.TIME_LIMIT.YEAR`, `FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_F11_TIME_LIMIT_TYPE.TIME_LIMIT.MONTH`, `FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_F11_TIME_LIMIT_TYPE.TIME_LIMIT.DAY`)"))

In [None]:
fd11.printSchema()

In [None]:
pd11 = fd11.to_pandas_on_spark()

pd11.info()

In [None]:
pd11 = pd11.drop(columns=['FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.YEAR'])
pd11 = pd11.drop(columns=['FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY', 'FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH', 'FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR'])
pd11 = pd11.drop(columns=['FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY', 'FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH', 'FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR'])
pd11 = pd11.drop(columns=['FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_DISPATCH_INVITATIONS_DATE.DAY', 'FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_DISPATCH_INVITATIONS_DATE.MONTH', 'FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_DISPATCH_INVITATIONS_DATE.YEAR'])
pd11 = pd11.drop(columns=['FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_F11_TIME_LIMIT_TYPE.TIME_LIMIT.DAY', 'FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_F11_TIME_LIMIT_TYPE.TIME_LIMIT.MONTH', 'FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_F11_TIME_LIMIT_TYPE.TIME_LIMIT.YEAR'])

In [None]:
pd11.info()

In [None]:
pd11["FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd11["FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE"])
pd11["FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"]= ps.to_datetime(pd11["FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"])
pd11["FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"]= ps.to_datetime(pd11["FD_OBJECT_CONTRACT_NOTICE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"])
pd11["FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_DISPATCH_INVITATIONS_DATE"]= ps.to_datetime(pd11["FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_DISPATCH_INVITATIONS_DATE"])
pd11["FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_F11_TIME_LIMIT_TYPE.TIME_LIMIT"]= ps.to_datetime(pd11["FD_PROCEDURES_CONTRACT_NOTICE.ADMINISTRATIVE_INFORMATION_F11_TIME_LIMIT_TYPE.TIME_LIMIT"])

In [None]:
pd11.info() 

In [None]:
fd11 = pd11.to_spark()

fd11.write.parquet("s3a_//falk2210/fd11_210921.parquet")

# 12 CONTRACT

<a id='d12' />

In [None]:
d12 = spark.read.json("falk/12.json/*.json")
#d12.printSchema()

In [None]:
pd12 = d12.to_pandas_on_spark()

In [None]:
pd12 = pd12.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd12.info()

In [None]:
pd12 = pd12.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd12 = pd12.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd12.info()

In [None]:
pd12 = pd12.rename(columns=lambda x_ re.sub('DESIGN_CONTEST_', '', x))
#pd12.info()

In [None]:
#pd12 = pd12.rename(columns=lambda x_ re.sub('_', '_', x))
#pd12.info()

In [None]:
pd12 = pd12.rename(columns=lambda x_ re.sub('\_P$', '', x))
pd12.info()

In [None]:
fd12 = pd12.to_spark()

In [None]:
fd12 = fd12.withColumn("FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.DAY`)"))
fd12 = fd12.withColumn("FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.DISPATCH_INVITATIONS_DATE", expr("make_date(`FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.DISPATCH_INVITATIONS_DATE.YEAR`, `FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.DISPATCH_INVITATIONS_DATE.MONTH`, `FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.DISPATCH_INVITATIONS_DATE.DAY`)"))
fd12 = fd12.withColumn("FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.TIME_LIMIT_CHP", expr("make_date(`FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.TIME_LIMIT_CHP.YEAR`, `FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.TIME_LIMIT_CHP.MONTH`, `FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.TIME_LIMIT_CHP.DAY`)"))

In [None]:
fd12.printSchema()

In [None]:
pd12 = fd12.to_pandas_on_spark()

pd12.info()

In [None]:
pd12 = pd12.drop(columns=['FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.YEAR'])
pd12 = pd12.drop(columns=['FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.DISPATCH_INVITATIONS_DATE.DAY', 'FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.DISPATCH_INVITATIONS_DATE.MONTH', 'FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.DISPATCH_INVITATIONS_DATE.YEAR'])
pd12 = pd12.drop(columns=['FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.TIME_LIMIT_CHP.DAY', 'FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.TIME_LIMIT_CHP.MONTH', 'FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.TIME_LIMIT_CHP.YEAR'])

In [None]:
pd12.info()

In [None]:
pd12["FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd12["FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE"])
pd12["FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.DISPATCH_INVITATIONS_DATE"]= ps.to_datetime(pd12["FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.DISPATCH_INVITATIONS_DATE"])
pd12["FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.TIME_LIMIT_CHP"]= ps.to_datetime(pd12["FD_PROCEDURES_ADMINISTRATIVE_INFORMATION_DESIGN_CONTEST_NOTICE.TIME_LIMIT_CHP"])

In [None]:
pd12.info() 

In [None]:
pd12['DOFFIN_APPENDIX.DOFFIN_FORM_TYPE.NATIONAL.REFERENCE_SECTION.REFERENCE_DOFFIN.TYPE'].unique()

In [None]:
fd12 = pd12.to_spark()

fd12.write.parquet("s3a_//falk2210/fd12_210921.parquet")

In [None]:
#spark.stop()

# 13 awards 

<a id='d13' />

In [None]:
d13 = spark.read.json("falk/13.json/*.json")
#d13.printSchema()

In [None]:
pd13 = d13.to_pandas_on_spark()

In [None]:
pd13 = pd13.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd13.info()

In [None]:
pd13 = pd13.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd13 = pd13.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd13.info()

In [None]:
pd13 = pd13.rename(columns=lambda x_ re.sub('RESULT_DESIGN_CONTEST_', '', x))
#pd13.info()

In [None]:
pd13 = pd13.rename(columns=lambda x_ re.sub('\_P$', '', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('F13_', '', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('_F13', '', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_', '', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('DOFFIN_FORM_TYPE_', '', x))
#pd13 = pd13.rename(columns=lambda x_ re.sub('ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_', '', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('ORGANISATION_', '', x))
#pd13 = pd13.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F3_PREVIOUS_PUBLICATION_EXISTS_F3', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_PREVIOUS_PUBLICATION_EXISTS', x))
#pd13 = pd13.rename(columns=lambda x_ re.sub('PREVIOUS_PUBLICATION_NOTICE_F3', 'PREVIOUS_PUBLICATION_NOTICE', x))
#pd13 = pd13.rename(columns=lambda x_ re.sub('PREVIOUS_NOTICE_BUYER_PROFILE_F3', 'PREVIOUS_NOTICE_BUYER_PROFILE', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('_ENTITY', '', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('E_MAILS_', '', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_RESULTS_CONTEST_RESULT_CONTEST_AWARD_PRIZES_NAME_ADDRESS_WINNER_', 'FD_AWARD_OF_CONTRACT_ECONOMIC_OPERATOR_NAME_ADDRESS_', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_RESULTS_CONTEST_RESULT_CONTEST_', 'FD_AWARD_OF_CONTRACT_', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE', 'FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_ADDRESS', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_OBJECT_CPV_', 'FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_PROCEDURES_PREVIOUS_PUBLICATION_OJ_', 'FD_PREVIOUS_PUBLICATION_EXISTS_', x))

In [None]:
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_ECONOMIC_OPERATOR_NAME_ADDRESS_', 'AWARDED_CONTRACT_CONTRACTOR_ADDRESS_CONTRACTOR_', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_', 'AWARD_CONTRACT_', x)) #ITEM
#pd13 = pd13.rename(columns=lambda x_ re.sub('', 'COMPLEMENTARY_INFO_ADDRESS_MEDIATION_BODY_', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_APPEAL_PROCEDURES_RESPONSIBLE_FOR_APPEAL_PROCEDURES_', 'COMPLEMENTARY_INFO_ADDRESS_REVIEW_BODY_', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE_', 'COMPLEMENTARY_INFO_', x)) #COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_ADDRESS_', 'CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_', x))
#pd13 = pd13.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_ADDRESS_FURTHER_INFO_', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_OR_CONTRACTING_AND_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_YES_CONTACT_DATA_OTHER_BEHALF_CONTRACTING_AUTORITHY_', 'CONTRACTING_BODY_ADDRESS_PARTICIPATION_', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_OR_CONTRACTING_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_TYPE_OF_CONTRACTING_AUTHORITY_', 'CONTRACTING_BODY_', x)) #CONTRACTING_BODY_CA_ACTIVITY_VALUE
#pd13 = pd13.rename(columns=lambda x_ re.sub('', 'LEFTI_', x)) #LEFTI_PARTICULAR_PROFESSION_CTYPE
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_', 'OBJECT_CONTRACT_', x))  #OBJECT_CONTRACT_CPV_MAIN_CPV_CODE_CODE
#pd13 = pd13.rename(columns=lambda x_ re.sub('', 'OBJECT_CONTRACT_OBJECT_DESCR_', x)) #OBJECT_CONTRACT_OBJECT_DESCR_CPV_ADDITIONAL
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_', 'OBJECT_CONTRACT_', x)) #OBJECT_CONTRACT_TITLE
#pd13 = pd13.rename(columns=lambda x_ re.sub('', 'PROCEDURE_', x)) #PROCEDURE_DATE_RECEIPT_TENDERS
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_OBJECT_TITLE_P', 'OBJECT_CONTRACT_OBJECT_DESCR_TITLE', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_OBJECT_DESCRIPTION', 'OBJECT_CONTRACT_OBJECT_DESCR_SHORT_DESCR', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_PRIZE_VALUE_CURRENCY', 'OBJECT_CONTRACT_VAL_TOTAL_CURRENCY', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_PRIZE_VALUE_text', 'OBJECT_CONTRACT_VAL_TOTAL_text', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_URL_GENERAL', 'CONTRACTING_BODY_ADDRESS_FURTHER_INFO_URL_GENERAL', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTEST_TITLE', 'AWARD_CONTRACT_TITLE', x))
#pd13 = pd13.rename(columns=lambda x_ re.sub('', 'AWARD_CONTRACT_LOT_NO', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_ADDITIONAL_INFORMATION', 'COMPLEMENTARY_INFO_INFO_ADD', x)) 
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_PROCEDURES_FILE_REFERENCE_NUMBER', 'OBJECT_CONTRACT_REFERENCE_NUMBER', x)) 
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_OR_CONTRACTING_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_', 'FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_', x))

In [None]:
pd13.info()

In [None]:
"""
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_OBJECT_TITLE_P', 'FD_AWARD_OF_CONTRACT', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_ECONOMIC_OPERATOR_NAME_ADDRESS_', 'ECONOMIC_OPERATOR_NAME_ADDRESS_', x))
#pd13 = pd13.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_CONTRACT_LIKELY_SUB_CONTRACTED_', 'FD_AWARD_OF_CONTRACT_MORE_INFORMATION_TO_SUB_CONTRACTED_CONTRACT_LIKELY_SUB_CONTRACTED_', x))
#pd13 = pd13.rename(columns=lambda x_ re.sub('DATE_OF_CONTRACT_AWARD_', 'CONTRACT_AWARD_DATE_', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_PRIZE_VALUE_', 'FD_AWARD_OF_CONTRACT_CONTRACT_VALUE_INFORMATION_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_CONTEST_NUMBER', 'CONTRACT_NUMBER', x))
#pd13 = pd13.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_APPEAL_PROCEDURES_RESPONSIBLE_FOR_MEDIATION_PROCEDURES_', 'FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_MEDIATION_PROCEDURE_BODY_RESPONSIBLE_', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_ADDRESS_', 'FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_OR_CONTRACTING_AND_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_YES_CONTACT_DATA_OTHER_BEHALF_CONTRACTING_AUTORITHY_', 'FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_YES_CONTACT_DATA_OTHER_BEHALF_CONTRACTING_AUTORITHY_', x))
#pd13 = pd13.rename(columns=lambda x_ re.sub('FD_OBJECT_DESCRIPTION_', 'FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_OBJECT_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_', 'FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_CONTEST_TITLE', 'FD_AWARD_OF_CONTRACT_CONTRACT_TITLE', x))
pd13 = pd13.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_APPEAL_PROCEDURES_RESPONSIBLE_FOR_APPEAL_PROCEDURES_', 'FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_', x))
"""

In [None]:
#pd13.info()

In [None]:
fd13 = pd13.to_spark()

In [None]:
"""
fd13 = fd13.withColumn("FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.DAY`)"))
fd13 = fd13.withColumn("FD_PROCEDURES_PREVIOUS_PUBLICATION_OJ.DATE_OJ", expr("make_date(`FD_PROCEDURES_PREVIOUS_PUBLICATION_OJ.DATE_OJ.YEAR`, `FD_PROCEDURES_PREVIOUS_PUBLICATION_OJ.DATE_OJ.MONTH`, `FD_PROCEDURES_PREVIOUS_PUBLICATION_OJ.DATE_OJ.DAY`)"))

"""

In [None]:
#fd13.printSchema()

In [None]:
"""
pd13 = fd13.to_pandas_on_spark()

pd13.info()
"""

In [None]:
"""
pd13 = pd13.drop(columns=['FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE.YEAR'])
pd13 = pd13.drop(columns=['FD_PROCEDURES_PREVIOUS_PUBLICATION_OJ.DATE_OJ.DAY', 'FD_PROCEDURES_PREVIOUS_PUBLICATION_OJ.DATE_OJ.MONTH', 'FD_PROCEDURES_PREVIOUS_PUBLICATION_OJ.DATE_OJ.YEAR'])
"""

In [None]:
#pd13.info()

In [None]:
#pd13["FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd13["FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE"])
#pd13["FD_PROCEDURES_PREVIOUS_PUBLICATION_OJ.DATE_OJ"]= ps.to_datetime(pd13["FD_PROCEDURES_PREVIOUS_PUBLICATION_OJ.DATE_OJ"])

In [None]:
#pd13.info() 

In [None]:
#pd13['DOFFIN_APPENDIX.DOFFIN_FORM_TYPE.NATIONAL.REFERENCE_SECTION.REFERENCE_DOFFIN.TYPE'].unique()

In [None]:
#fd13 = pd13.to_spark()

In [None]:
#fd13.write.parquet("s3a_//falk2210/fd13_210921.parquet")

In [None]:
fd13.write.mode("append").json("falk2210/f_award.json")

# 14 ADDITIONAL_INFORMATION_CORRIGENDUM 

<a id='d14' />

In [None]:
d14 = spark.read.json("falk/14.json/*.json")
#d14.printSchema()

In [None]:
pd14 = d14.to_pandas_on_spark()

In [None]:
pd14 = pd14.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd14.info()

In [None]:
pd14 = pd14.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd14 = pd14.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd14.info()

In [None]:
pd14 = pd14.rename(columns=lambda x_ re.sub('ADDITIONAL_INFORMATION_CORRIGENDUM_', '', x))
#pd14.info()

In [None]:
#pd14 = pd14.rename(columns=lambda x_ re.sub('_', '_', x))
#pd14.info()

In [None]:
pd14['FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.ADD.NEW_VALUE.TEXT'].update(pd14.pop('FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.ADD.NEW_VALUE.TEXT.P'))
pd14['FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.DELETE.OLD_VALUE.TEXT'].update(pd14.pop('FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.DELETE.OLD_VALUE.TEXT.P'))
pd14['FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.NEW_VALUE.TEXT'].update(pd14.pop('FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.NEW_VALUE.TEXT.P'))
pd14['FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.OLD_VALUE.TEXT'].update(pd14.pop('FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.OLD_VALUE.TEXT.P'))
pd14['FD_COMPLEMENTARY_ICAR.OTHER_ADDITIONAL_INFO'].update(pd14.pop('FD_COMPLEMENTARY_ICAR.OTHER_ADDITIONAL_INFO.P'))
pd14['FD_OBJECT_ICAR.DESCRIPTION_ICAR.SHORT_DESCRIPTION_CONTRACT'].update(pd14.pop('FD_OBJECT_ICAR.DESCRIPTION_ICAR.SHORT_DESCRIPTION_CONTRACT.P'))
pd14['FD_OBJECT_ICAR.DESCRIPTION_ICAR.TITLE_CONTRACT'].update(pd14.pop('FD_OBJECT_ICAR.DESCRIPTION_ICAR.TITLE_CONTRACT.P'))
pd14['FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.FILE_REFERENCE_NUMBER'].update(pd14.pop('FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.FILE_REFERENCE_NUMBER.P'))

In [None]:
#pd14.info()

In [None]:
pd14 = pd14.rename(columns=lambda x_ re.sub('\_P$', '', x))
#pd14.info()

In [None]:
fd14 = pd14.to_spark()

In [None]:
fd14 = fd14.withColumn("FD_COMPLEMENTARY_ICAR.NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_ICAR.NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_ICAR.NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_ICAR.NOTICE_DISPATCH_DATE.DAY`)"))
fd14 = fd14.withColumn("FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.ADD.NEW_VALUE.DATE_TIME", expr("make_date(`FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.ADD.NEW_VALUE.DATE_TIME.YEAR`, `FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.ADD.NEW_VALUE.DATE_TIME.MONTH`, `FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.ADD.NEW_VALUE.DATE_TIME.DAY`)"))
fd14 = fd14.withColumn("FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.DELETE.OLD_VALUE.DATE_TIME", expr("make_date(`FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.DELETE.OLD_VALUE.DATE_TIME.YEAR`, `FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.DELETE.OLD_VALUE.DATE_TIME.MONTH`, `FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.DELETE.OLD_VALUE.DATE_TIME.DAY`)"))
fd14 = fd14.withColumn("FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.NEW_VALUE.DATE_TIME", expr("make_date(`FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.NEW_VALUE.DATE_TIME.YEAR`, `FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.NEW_VALUE.DATE_TIME.MONTH`, `FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.NEW_VALUE.DATE_TIME.DAY`)"))
fd14 = fd14.withColumn("FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.OLD_VALUE.DATE_TIME", expr("make_date(`FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.OLD_VALUE.DATE_TIME.YEAR`, `FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.OLD_VALUE.DATE_TIME.MONTH`, `FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.OLD_VALUE.DATE_TIME.DAY`)"))
fd14 = fd14.withColumn("FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.NOTICE_PUBLICATION.DATE_OJ", expr("make_date(`FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.NOTICE_PUBLICATION.DATE_OJ.YEAR`, `FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.NOTICE_PUBLICATION.DATE_OJ.MONTH`, `FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.NOTICE_PUBLICATION.DATE_OJ.DAY`)"))
fd14 = fd14.withColumn("FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.ORIGINAL_DISPATCH_DATE", expr("make_date(`FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.ORIGINAL_DISPATCH_DATE.YEAR`, `FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.ORIGINAL_DISPATCH_DATE.MONTH`, `FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.ORIGINAL_DISPATCH_DATE.DAY`)"))

In [None]:
fd14.printSchema()

In [None]:
pd14 = fd14.to_pandas_on_spark()

pd14.info()

In [None]:
pd14 = pd14.drop(columns=['FD_COMPLEMENTARY_ICAR.NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_ICAR.NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_ICAR.NOTICE_DISPATCH_DATE.YEAR'])
pd14 = pd14.drop(columns=['FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.ADD.NEW_VALUE.DATE_TIME.DAY', 'FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.ADD.NEW_VALUE.DATE_TIME.MONTH', 'FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.ADD.NEW_VALUE.DATE_TIME.YEAR'])
pd14 = pd14.drop(columns=['FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.DELETE.OLD_VALUE.DATE_TIME.DAY', 'FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.DELETE.OLD_VALUE.DATE_TIME.MONTH', 'FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.DELETE.OLD_VALUE.DATE_TIME.YEAR'])
pd14 = pd14.drop(columns=['FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.NEW_VALUE.DATE_TIME.DAY', 'FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.NEW_VALUE.DATE_TIME.MONTH', 'FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.NEW_VALUE.DATE_TIME.YEAR'])
pd14 = pd14.drop(columns=['FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.OLD_VALUE.DATE_TIME.DAY', 'FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.OLD_VALUE.DATE_TIME.MONTH', 'FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.OLD_VALUE.DATE_TIME.YEAR'])
pd14 = pd14.drop(columns=['FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.NOTICE_PUBLICATION.DATE_OJ.DAY', 'FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.NOTICE_PUBLICATION.DATE_OJ.MONTH', 'FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.NOTICE_PUBLICATION.DATE_OJ.YEAR'])
pd14 = pd14.drop(columns=['FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.ORIGINAL_DISPATCH_DATE.DAY', 'FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.ORIGINAL_DISPATCH_DATE.MONTH', 'FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.ORIGINAL_DISPATCH_DATE.YEAR'])

In [None]:
pd14.info()

In [None]:
pd14["FD_COMPLEMENTARY_ICAR.NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd14["FD_COMPLEMENTARY_ICAR.NOTICE_DISPATCH_DATE"])
pd14["FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.ADD.NEW_VALUE.DATE_TIME"]= ps.to_datetime(pd14["FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.ADD.NEW_VALUE.DATE_TIME"])
pd14["FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.DELETE.OLD_VALUE.DATE_TIME"]= ps.to_datetime(pd14["FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.DELETE.OLD_VALUE.DATE_TIME"])
pd14["FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.NEW_VALUE.DATE_TIME"]= ps.to_datetime(pd14["FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.NEW_VALUE.DATE_TIME"])
pd14["FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.OLD_VALUE.DATE_TIME"]= ps.to_datetime(pd14["FD_COMPLEMENTARY_ICAR.NOTICE_INVOLVES_ICAR.CORRECTION_ADDITIONAL_INFO.INFORMATION_CORRECTED_ADDED.ORIGINAL_NOTICE_CORRESPONDING_TENDER.ORIGINAL_NOTICE.CORR.REPLACE.OLD_VALUE.DATE_TIME"])
pd14["FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.NOTICE_PUBLICATION.DATE_OJ"]= ps.to_datetime(pd14["FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.NOTICE_PUBLICATION.DATE_OJ"])
pd14["FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.ORIGINAL_DISPATCH_DATE"]= ps.to_datetime(pd14["FD_PROCEDURES_ICAR.ADMINISTRATIVE_INFORMATION.ORIGINAL_DISPATCH_DATE"])

In [None]:
pd14.info() 

In [None]:
fd14 = pd14.to_spark()

fd14.write.parquet("s3a_//falk2210/fd14_210921.parquet")

# 15 Award 

<a id='d15' />

In [None]:
d15 = spark.read.json("falk/15.json/*.json")
#d15.printSchema()

In [None]:
pd15 = d15.to_pandas_on_spark()

In [None]:
pd15 = pd15.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd15.info()

In [None]:
pd15 = pd15.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd15 = pd15.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd15.info()

In [None]:
pd15 = pd15.rename(columns=lambda x_ re.sub('VOLUNTARY_EX_ANTE_TRANSPARENCY_NOTICE_', '', x))
#pd15.info()

In [None]:
pd15['FD_AWARD_OF_CONTRACT_DEFENCE_CONTRACT_TITLE'].update(pd15.pop('FD_AWARD_OF_CONTRACT_DEFENCE_CONTRACT_TITLE_P'))
pd15['FD_COMPLEMENTARY_INFORMATION_VEAT_ADDITIONAL_INFORMATION'].update(pd15.pop('FD_COMPLEMENTARY_INFORMATION_VEAT_ADDITIONAL_INFORMATION_P'))
pd15['FD_COMPLEMENTARY_INFORMATION_VEAT_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION'].update(pd15.pop('FD_COMPLEMENTARY_INFORMATION_VEAT_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION_P'))
pd15['FD_OBJECT_VEAT_DESCRIPTION_VEAT_LOCATION_NUTS_LOCATION'].update(pd15.pop('FD_OBJECT_VEAT_DESCRIPTION_VEAT_LOCATION_NUTS_LOCATION_P'))
pd15['FD_OBJECT_VEAT_DESCRIPTION_VEAT_SHORT_CONTRACT_DESCRIPTION'].update(pd15.pop('FD_OBJECT_VEAT_DESCRIPTION_VEAT_SHORT_CONTRACT_DESCRIPTION_P'))
pd15['FD_OBJECT_VEAT_DESCRIPTION_VEAT_TITLE_CONTRACT'].update(pd15.pop('FD_OBJECT_VEAT_DESCRIPTION_VEAT_TITLE_CONTRACT_P'))
pd15['FD_PROCEDURE_DEFINITION_VEAT_ADMINISTRATIVE_INFORMATION_VEAT_FILE_REFERENCE_NUMBER'].update(pd15.pop('FD_PROCEDURE_DEFINITION_VEAT_ADMINISTRATIVE_INFORMATION_VEAT_FILE_REFERENCE_NUMBER_P'))
pd15['FD_PROCEDURE_DEFINITION_VEAT_TYPE_OF_PROCEDURE_DEF_F15_F15_AWARD_WITHOUT_PRIOR_PUBLICATION_ANNEX_D_F15_ANNEX_D1_REASON_CONTRACT_LAWFUL'].update(pd15.pop('FD_PROCEDURE_DEFINITION_VEAT_TYPE_OF_PROCEDURE_DEF_F15_F15_AWARD_WITHOUT_PRIOR_PUBLICATION_ANNEX_D_F15_ANNEX_D1_REASON_CONTRACT_LAWFUL_P'))
pd15['FD_PROCEDURE_DEFINITION_VEAT_TYPE_OF_PROCEDURE_DEF_F15_F15_PT_NEGOTIATED_WITHOUT_COMPETITION_ANNEX_D_F15_ANNEX_D1_REASON_CONTRACT_LAWFUL'].update(pd15.pop('FD_PROCEDURE_DEFINITION_VEAT_TYPE_OF_PROCEDURE_DEF_F15_F15_PT_NEGOTIATED_WITHOUT_COMPETITION_ANNEX_D_F15_ANNEX_D1_REASON_CONTRACT_LAWFUL_P'))

In [None]:
#pd15.info()

In [None]:
pd15 = pd15.rename(columns=lambda x_ re.sub('\_P$', '', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('F15_', '', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('_F15', '', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_', '', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('DOFFIN_FORM_TYPE_', '', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('ORGANISATION_', '', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('_VEAT', '', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('_DEFENCE', '', x))


In [None]:
pd15 = pd15.rename(columns=lambda x_ re.sub('ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_', '', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_PROCEDURES_ADMINISTRATIVE_INFO_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F6', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('PREVIOUS_PUBLICATION_NOTICE_F6', 'PREVIOUS_PUBLICATION_NOTICE', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('PREVIOUS_NOTICE_BUYER_PROFILE_F6', 'PREVIOUS_NOTICE_BUYER_PROFILE', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('ENTITY_', '', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_AWARD_CONTRACT_MANDATORY_INFORMATION_NOT_INTENDED_PUBLICATION_', '', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('_DEF_', '', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('E_MAILS_', '', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_', 'FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('D_OBJECT_DESCRIPTION_', 'FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_PREVIOUS_PUBLICATION_EXISTS_', 'FD_PREVIOUS_PUBLICATION_EXISTS_', x))

In [None]:
#pd15.info()

In [None]:
pd15['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_ADDRESS'].update(pd15.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_ADDRESS'))
pd15['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_COUNTRY_VALUE'].update(pd15.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_COUNTRY_VALUE'))
pd15['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_E_MAIL'].update(pd15.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_E_MAIL'))
pd15['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_FAX'].update(pd15.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_FAX'))
pd15['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_NATIONALID'].update(pd15.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_NATIONALID'))
pd15['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_OFFICIALNAME'].update(pd15.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_OFFICIALNAME'))
pd15['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_PHONE'].update(pd15.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_PHONE'))
pd15['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_POSTAL_CODE'].update(pd15.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_POSTAL_CODE'))
pd15['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_TOWN'].update(pd15.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_TOWN'))
pd15['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_URL'].update(pd15.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_URL'))

pd15['FD_PROCEDURE_DEFINITION_TYPE_OF_PROCEDUREAWARD_WITHOUT_PRIOR_PUBLICATION_ANNEX_D_ANNEX_D1_NO_OPEN_RESTRICTED_VALUE'].update(pd15.pop('FD_PROCEDURE_DEFINITION_TYPE_OF_PROCEDUREAWARD_WITHOUT_PRIOR_PUBLICATION_ANNEX_D_ANNEX_D3_NO_OPEN_RESTRICTED_VALUE'))
pd15['FD_PROCEDURE_DEFINITION_TYPE_OF_PROCEDUREAWARD_WITHOUT_PRIOR_PUBLICATION_ANNEX_D_ANNEX_D1_REASON_CONTRACT_LAWFUL'].update(pd15.pop('FD_PROCEDURE_DEFINITION_TYPE_OF_PROCEDUREAWARD_WITHOUT_PRIOR_PUBLICATION_ANNEX_D_ANNEX_D3_REASON_CONTRACT_LAWFUL'))
pd15['FD_PROCEDURE_DEFINITION_TYPE_OF_PROCEDUREPT_NEGOTIATED_WITHOUT_COMPETITION_ANNEX_D_ANNEX_D1_REASON_CONTRACT_LAWFUL'].update(pd15.pop('FD_PROCEDURE_DEFINITION_TYPE_OF_PROCEDUREPT_NEGOTIATED_WITHOUT_COMPETITION_ANNEX_D_ANNEX_D2_REASON_CONTRACT_LAWFUL'))
pd15['FD_PROCEDURE_DEFINITION_TYPE_OF_PROCEDUREPT_NEGOTIATED_WITHOUT_COMPETITION_ANNEX_D_ANNEX_D1_REASON_CONTRACT_LAWFUL'].update(pd15.pop('FD_PROCEDURE_DEFINITION_TYPE_OF_PROCEDUREPT_NEGOTIATED_WITHOUT_COMPETITION_ANNEX_D_ANNEX_D3_REASON_CONTRACT_LAWFUL')) 

In [None]:
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_ECONOMIC_OPERATOR_NAME_ADDRESS_', 'AWARDED_CONTRACT_CONTRACTOR_ADDRESS_CONTRACTOR_', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_', 'AWARD_CONTRACT_', x)) #ITEM
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_MEDIATION_PROCEDURE_BODY_RESPONSIBLE_', 'COMPLEMENTARY_INFO_ADDRESS_MEDIATION_BODY_', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_', 'COMPLEMENTARY_INFO_ADDRESS_REVIEW_BODY_', x))
#d15 = pd15.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE_', 'COMPLEMENTARY_INFO_', x)) #COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_', 'CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_', x))
#pd15 = pd15.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_ADDRESS_FURTHER_INFO_', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_TYPE_AND_ACTIVITIES_OR_CONTRACTING_AND_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_YES_CONTACT_DATA_OTHER_BEHALF_CONTRACTING_AUTORITHY_', 'CONTRACTING_BODY_ADDRESS_PARTICIPATION_', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_TYPE_AND_ACTIVITIES_OR_CONTRACTING_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_TYPE_OF_CONTRACTING_AUTHORITY_', 'CONTRACTING_BODY_', x)) #CONTRACTING_BODY_CA_ACTIVITY_VALUE
#pd15 = pd15.rename(columns=lambda x_ re.sub('', 'LEFTI_', x)) #LEFTI_PARTICULAR_PROFESSION_CTYPE
pd15 = pd15.rename(columns=lambda x_ re.sub('FFD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_', 'OBJECT_CONTRACT_', x))  #OBJECT_CONTRACT_CPV_MAIN_CPV_CODE_CODE
#pd15 = pd15.rename(columns=lambda x_ re.sub('', 'OBJECT_CONTRACT_OBJECT_DESCR_', x)) #OBJECT_CONTRACT_OBJECT_DESCR_CPV_ADDITIONAL
#pd15 = pd15.rename(columns=lambda x_ re.sub('', 'OBJECT_CONTRACT_OBJECT_DESCR_', x))  #OBJECT_CONTRACT_OBJECT_DESCR_ITEM
pd15 = pd15.rename(columns=lambda x_ re.sub('FFD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_', 'OBJECT_CONTRACT_', x)) #OBJECT_CONTRACT_TITLE
#pd15 = pd15.rename(columns=lambda x_ re.sub('', 'PROCEDURE_', x)) #PROCEDURE_DATE_RECEIPT_TENDERS
pd15 = pd15.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_TITLE_CONTRACT', 'OBJECT_CONTRACT_OBJECT_DESCR_TITLE', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_SHORT_CONTRACT_DESCRIPTION', 'OBJECT_CONTRACT_OBJECT_DESCR_SHORT_DESCR', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_MORE_INFORMATION_TO_SUB_CONTRACTED_CONTRACT_LIKELY_SUB_CONTRACTED_WITH_EXCLUDING_VAT_VALUE_CURRENCY', 'AWARDED_CONTRACT_VAL_SUBCONTRACTING_CURRENCY', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_MORE_INFORMATION_TO_SUB_CONTRACTED_CONTRACT_LIKELY_SUB_CONTRACTED_WITH_EXCLUDING_VAT_VALUE_text', 'AWARDED_CONTRACT_VAL_SUBCONTRACTING_text', x))

pd15 = pd15.rename(columns=lambda x_ re.sub('FD_OBJECT_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_CURRENCY', 'OBJECT_CONTRACT_VAL_TOTAL_CURRENCY', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_OBJECT_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_HIGH_VALUE', 'OBJECT_CONTRACT_VAL_RANGE_TOTAL_HIGH', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_OBJECT_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_LOW_VALUE', 'OBJECT_CONTRACT_VAL_RANGE_TOTAL_LOW', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_OBJECT_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_VALUE_COST', 'OBJECT_CONTRACT_VAL_TOTAL_text', x))

pd15 = pd15.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_CURRENCY', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_HIGH_VALUE', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_HIGH', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_LOW_VALUE', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_LOW', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_INITIAL_ESTIMATED_TOTAL_VALUE_CONTRACT_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_CURRENCY', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_INITIAL_ESTIMATED_TOTAL_VALUE_CONTRACT_VALUE_COST', 'AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_text', x))
#pd15 = pd15.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_URL_DOCUMENT', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_URL_PARTICIPATE', 'CONTRACTING_BODY_URL_PARTICIPATION', x))
#pd15 = pd15.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_URL_TOOL', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_URL_GENERAL', 'CONTRACTING_BODY_ADDRESS_FURTHER_INFO_URL_GENERAL', x))
#pd15 = pd15.rename(columns=lambda x_ re.sub('', 'PROCEDURE_URL_NATIONAL_PROCEDURE', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_', 'CONTRACTING_BODY_', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_AWARD_CRITERIA_INFORMATION_AWARD_CRITERIA_DETAIL_MOST_ECONOMICALLY_ADVANTAGEOUS_TENDER_SHORT_CRITERIA_DEFINITION_', 'PROCEDURE_', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_TITLE', 'AWARD_CONTRACT_TITLE', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_LOT_NUMBER', 'AWARD_CONTRACT_LOT_NO', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_NOTICE_DISPATCH_DATE_', 'COMPLEMENTARY_INFO_', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_ADDITIONAL_INFORMATION', 'COMPLEMENTARY_INFO_INFO_ADD', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_RELATES_TO_EU_PROJECT_YES', 'OBJECT_CONTRACT_OBJECT_DESCR_EU_PROGR_RELATED', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_ADMINISTRATIVE_INFORMATION_FILE_REFERENCE_NUMBER', 'OBJECT_CONTRACT_REFERENCE_NUMBER', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_CONTRACTING_AUTHORITY_TYPE_AND_ACTIVITIES_OR_CONTRACTING_AND_PURCHASING_ON_BEHALF_', 'FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_MORE_INFORMATION_TO_SUB_CONTRACTED_CONTRACT_LIKELY_SUB_CONTRACTED_WITH_', 'AWARD_CONTRACT_CONTRACT_LIKELY_SUB_CONTRACTED_', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_TYPE_AND_ACTIVITIES_', 'FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_', x))
pd15 = pd15.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_ACTIVITIES_OF_CONTRACTING_ENTITY_', 'FD_CONTRACTING_ACTIVITIES_OF_CONTRACTING_ENTITY_', x))

pd15['OBJECT_CONTRACT_TYPE_CONTRACT_LOCATION_SERVICE_CATEGORY'].update(pd15.pop('OBJECT_CONTRACT_TYPE_CONTRACT_SERVICE_CATEGORY'))
pd15['OBJECT_CONTRACT_TYPE_CONTRACT_LOCATION_TYPE_CONTRACT_VALUE'].update(pd15.pop('OBJECT_CONTRACT_TYPE_CONTRACT_TYPE_CONTRACT_VALUE'))
pd15['OBJECT_CONTRACT_TYPE_CONTRACT_LOCATION_TYPE_SUPPLIES_CONTRACT_VALUE'].update(pd15.pop('OBJECT_CONTRACT_TYPE_CONTRACT_TYPE_SUPPLIES_CONTRACT_VALUE'))

In [None]:
pd15.info()

In [None]:
#fd15 = pd15.to_spark()

In [None]:
"""
fd15 = fd15.withColumn("FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE", expr("make_date(`FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE.YEAR`, `FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE.MONTH`, `FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE.DAY`)"))
fd15 = fd15.withColumn("FD_COMPLEMENTARY_INFORMATION_VEAT.NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_VEAT.NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_VEAT.NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_VEAT.NOTICE_DISPATCH_DATE.DAY`)"))
fd15 = fd15.withColumn("FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.CNT_NOTICE_INFORMATION.DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.CNT_NOTICE_INFORMATION.DATE_OJ.YEAR`, `FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.CNT_NOTICE_INFORMATION.DATE_OJ.MONTH`, `FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.CNT_NOTICE_INFORMATION.DATE_OJ.DAY`)"))
fd15 = fd15.withColumn("FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.YEAR`, `FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.MONTH`, `FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.DAY`)"))
fd15 = fd15.withColumn("FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.PREVIOUS_PUBLICATION_NOTICE_F15.DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.PREVIOUS_PUBLICATION_NOTICE_F15.DATE_OJ.YEAR`, `FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.PREVIOUS_PUBLICATION_NOTICE_F15.DATE_OJ.MONTH`, `FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.PREVIOUS_PUBLICATION_NOTICE_F15.DATE_OJ.DAY`)"))
"""

In [None]:
#fd15.printSchema()

In [None]:

#pd15 = fd15.to_pandas_on_spark()

#pd15.info()

In [None]:
"""
pd15 = pd15.drop(columns=['FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE.DAY', 'FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE.MONTH', 'FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE.YEAR'])
pd15 = pd15.drop(columns=['FD_COMPLEMENTARY_INFORMATION_VEAT.NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_VEAT.NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_VEAT.NOTICE_DISPATCH_DATE.YEAR'])
pd15 = pd15.drop(columns=['FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.CNT_NOTICE_INFORMATION.DATE_OJ.DAY', 'FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.CNT_NOTICE_INFORMATION.DATE_OJ.MONTH', 'FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.CNT_NOTICE_INFORMATION.DATE_OJ.YEAR'])
pd15 = pd15.drop(columns=['FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.DAY', 'FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.MONTH', 'FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ.YEAR'])
pd15 = pd15.drop(columns=['FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.PREVIOUS_PUBLICATION_NOTICE_F15.DATE_OJ.DAY', 'FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.PREVIOUS_PUBLICATION_NOTICE_F15.DATE_OJ.MONTH', 'FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.PREVIOUS_PUBLICATION_NOTICE_F15.DATE_OJ.YEAR'])
"""

In [None]:
#pd15.info()

In [None]:
"""
pd15["FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE"]= ps.to_datetime(pd15["FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE"])
pd15["FD_COMPLEMENTARY_INFORMATION_VEAT.NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd15["FD_COMPLEMENTARY_INFORMATION_VEAT.NOTICE_DISPATCH_DATE"])
pd15["FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.CNT_NOTICE_INFORMATION.DATE_OJ"]= ps.to_datetime(pd15["FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.CNT_NOTICE_INFORMATION.DATE_OJ"])
pd15["FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ"]= ps.to_datetime(pd15["FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.OTHER_PREVIOUS_PUBLICATIONS.OTHER_PREVIOUS_PUBLICATION.DATE_OJ"])
pd15["FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.PREVIOUS_PUBLICATION_NOTICE_F15.DATE_OJ"]= ps.to_datetime(pd15["FD_PROCEDURE_DEFINITION_VEAT.ADMINISTRATIVE_INFORMATION_VEAT.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F15.PREVIOUS_PUBLICATION_EXISTS_F15.PREVIOUS_PUBLICATION_NOTICE_F15.DATE_OJ"])
"""

In [None]:
#pd15.info() 

In [None]:
fd15 = pd15.to_spark()

In [None]:
#fd15.write.parquet("s3a_//falk2210/fd15_210921.parquet")

In [None]:
fd15.write.mode("append").json("falk2210/f_award.json")

# 16 PRI 

<a id='d16' />

In [None]:
d16 = spark.read.json("falk/16.json/*.json")
#d16.printSchema()

In [None]:
pd16 = d16.to_pandas_on_spark()

In [None]:
pd16 = pd16.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd16.info()

In [None]:
pd16 = pd16.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd16 = pd16.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd16.info()

In [None]:
pd16 = pd16.rename(columns=lambda x_ re.sub('PRIOR_INFORMATION_DEFENCE_', '', x))
#pd16.info()

In [None]:
#pd16 = pd16.rename(columns=lambda x_ re.sub('_', '_', x))
#pd16.info()

In [None]:
pd16 = pd16.rename(columns=lambda x_ re.sub('\_P$', '', x))
#pd16.info()

In [None]:
pd16 = pd16.rename(columns=lambda x_ re.sub('PRIOR_INFORMATION_', '', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('SUPPLIES_SERVICES_', '', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('_OTH', '', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('F16_', '', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_DEFENCE_DIVISION_INTO_LOTS_DIV_INTO_LOT_YES_', 'FD_OBJECT_DIVISION_INTO_LOTS_DIV_INTO_LOT_YES_', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_WORKS_QUANTITY_SCOPE_WORKS_DEFENCE', 'FD_OBJECT_TOTAL_QUANTITY_OR_SCOPE', x)) 
pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_', x)) 


pd16 = pd16.rename(columns=lambda x_ re.sub('FD_AUTHORITY_TYPE_AND_ACTIVITIES_OR_CONTRACTING_ENTITY_AND_PURCHASING_ON_BEHALF_', 'FD_AUTHORITY_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_WORKS_ADDITIONAL_INFORMATION', 'FD_OBJECT_ADDITIONAL_INFORMATION', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_WORKS_TITLE_CONTRACT', 'FD_OBJECT_TITLE_CONTRACT', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_WORKS_TYPE_CONTRACT_PLACE_DELIVERY_DEFENCE_SITE_OR_LOCATION', 'FD_OBJECT_TYPE_CONTRACT_PLACE_DELIVERY_SITE_OR_LOCATION', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_WORKS_TYPE_CONTRACT_PLACE_DELIVERY_DEFENCE_TYPE_CONTRACT_PI_DEFENCE_SERVICE_CATEGORY_DEFENCE', 'FD_OBJECT_TYPE_CONTRACT_PLACE_DELIVERY_SERVICE_CATEGORY', x))



pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_WORKS_', 'FD_OBJECT_', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_TOTAL_QUANTITY_OR_SCOPE_COSTS_RANGE_AND_CURRENCY_', 'FD_OBJECT_COSTS_RANGE_AND_CURRENCY_', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_PROCEDURE_DATE_STARTING_', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_START_DATE_', x))


pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_TOTAL_QUANTITY_OR_SCOPE_TOTAL_QUANTITY_OR_SCOPE', 'FD_OBJECT_TOTAL_QUANTITY_OR_SCOPE', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_MONTHS', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_MONTHS', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_SCHEDULED_DATE_PERIOD_PROCEDURE_DATE_STARTING_', 'FD_OBJECT_PROCEDURE_DATE_STARTING_', x))
pd16 = pd16.rename(columns=lambda x_ re.sub('FD_OBJECT_TYPE_CONTRACT_PLACE_DELIVERY_DEFENCE_TYPE_CONTRACT_PI_DEFENCE_TYPE_CONTRACT_VALUE', 'FD_OBJECT_TYPE_CONTRACT_PLACE_DELIVERY_TYPE_CONTRACT_VALUE', x))

In [None]:
pd16.info()

In [None]:
fd16 = pd16.to_spark()

In [None]:
fd16 = fd16.withColumn("FD_OTH_INFO_NOTICE_DISPATCH_DATE", expr("make_date(`FD_OTH_INFO_NOTICE_DISPATCH_DATE.YEAR`, `FD_OTH_INFO_NOTICE_DISPATCH_DATE.MONTH`, `FD_OTH_INFO_NOTICE_DISPATCH_DATE.DAY`)"))
fd16 = fd16.withColumn("FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE", expr("make_date(`FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR`, `FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH`, `FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY`)"))
fd16 = fd16.withColumn("FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE", expr("make_date(`FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR`, `FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH`, `FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY`)"))
fd16 = fd16.withColumn("FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING", expr("make_date(`FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.YEAR`, `FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.MONTH`, `FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.DAY`)"))

In [None]:
fd16.printSchema()

In [None]:
pd16 = fd16.to_pandas_on_spark()

#pd16.info()

In [None]:
pd16 = pd16.drop(columns=['FD_OTH_INFO_NOTICE_DISPATCH_DATE.DAY', 'FD_OTH_INFO_NOTICE_DISPATCH_DATE.MONTH', 'FD_OTH_INFO_NOTICE_DISPATCH_DATE.YEAR'])
pd16 = pd16.drop(columns=['FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY', 'FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH', 'FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR'])
pd16 = pd16.drop(columns=['FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY', 'FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH', 'FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR'])
pd16 = pd16.drop(columns=['FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.DAY', 'FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.MONTH', 'FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.YEAR'])

In [None]:
pd16.info()

In [None]:
pd16["FD_OTH_INFO_NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd16["FD_OTH_INFO_NOTICE_DISPATCH_DATE"])
pd16["FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"]= ps.to_datetime(pd16["FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"])
pd16["FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"]= ps.to_datetime(pd16["FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"])
pd16["FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING"]= ps.to_datetime(pd16["FD_OBJECT_WORKS_SUPPLIES_SERVICES_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING"])

In [None]:
pd16.info() 

In [None]:
fd16.write.parquet("s3a_//falk2210/fd16_210921.parquet")

In [None]:
fd16.write.mode('append').json("s3a_//falk2210/_pri.json")

In [None]:
pri = ps.concat([pd1, pd16], ignore_index=True)

In [None]:
pri.info()

# 17 CONTRACT 

<a id='d17' />

In [None]:
d17 = spark.read.json("falk/17.json/*.json")
#d17.printSchema()

In [None]:
pd17 = d17.to_pandas_on_spark()

In [None]:
pd17 = pd17.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd17.info()

In [None]:
pd17 = pd17.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd17 = pd17.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd17.info()

In [None]:
pd17 = pd17.rename(columns=lambda x_ re.sub('CONTRACT_DEFENCE_', '', x))
#pd17.info()

In [None]:
#pd17 = pd17.rename(columns=lambda x_ re.sub('_', '_', x))
#pd17.info()

In [None]:
pd17['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_ADDITIONAL_INFORMATION'].update(pd17.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_ADDITIONAL_INFORMATION_P'))
pd17['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION'].update(pd17.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION_P'))
pd17['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_RECURRENT_PROCUREMENT'].update(pd17.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_RECURRENT_PROCUREMENT_P'))
pd17['FD_LEFTI_CONTRACT_RELATING_CONDITIONS_DEPOSITS_GUARANTEES_REQUIRED'].update(pd17.pop('FD_LEFTI_CONTRACT_RELATING_CONDITIONS_DEPOSITS_GUARANTEES_REQUIRED_P'))
pd17['FD_LEFTI_CONTRACT_RELATING_CONDITIONS_EXISTENCE_OTHER_PARTICULAR_CONDITIONS'].update(pd17.pop('FD_LEFTI_CONTRACT_RELATING_CONDITIONS_EXISTENCE_OTHER_PARTICULAR_CONDITIONS_P'))
pd17['FD_LEFTI_CONTRACT_RELATING_CONDITIONS_LEGAL_FORM'].update(pd17.pop('FD_LEFTI_CONTRACT_RELATING_CONDITIONS_LEGAL_FORM_P'))
pd17['FD_LEFTI_CONTRACT_RELATING_CONDITIONS_MAIN_FINANCING_CONDITIONS'].update(pd17.pop('FD_LEFTI_CONTRACT_RELATING_CONDITIONS_MAIN_FINANCING_CONDITIONS_P'))
pd17['FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_ECONOMIC_OPERATORS_PERSONAL_SITUATION'].update(pd17.pop('FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_ECONOMIC_OPERATORS_PERSONAL_SITUATION_P'))
pd17['FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_ECONOMIC_OPERATORS_PERSONAL_SITUATION_SUBCONTRACTORS'].update(pd17.pop('FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_ECONOMIC_OPERATORS_PERSONAL_SITUATION_SUBCONTRACTORS_P'))
pd17['FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_F17_ECONOMIC_FINANCIAL_CAPACITY_EAF_CAPACITY_INFORMATION'].update(pd17.pop('FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_F17_ECONOMIC_FINANCIAL_CAPACITY_EAF_CAPACITY_INFORMATION_P'))
pd17['FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_F17_ECONOMIC_FINANCIAL_CAPACITY_EAF_CAPACITY_MIN_LEVEL'].update(pd17.pop('FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_F17_ECONOMIC_FINANCIAL_CAPACITY_EAF_CAPACITY_MIN_LEVEL_P'))
pd17['FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_F17_ECONOMIC_FINANCIAL_CAPACITY_SUBCONTRACTORS_EAF_CAPACITY_INFORMATION'].update(pd17.pop('FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_F17_ECONOMIC_FINANCIAL_CAPACITY_SUBCONTRACTORS_EAF_CAPACITY_INFORMATION_P'))
pd17['FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_F17_ECONOMIC_FINANCIAL_CAPACITY_SUBCONTRACTORS_EAF_CAPACITY_MIN_LEVEL'].update(pd17.pop('FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_F17_ECONOMIC_FINANCIAL_CAPACITY_SUBCONTRACTORS_EAF_CAPACITY_MIN_LEVEL_P'))
pd17['FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_T_CAPACITY_INFORMATION'].update(pd17.pop('FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_T_CAPACITY_INFORMATION_P'))
pd17['FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_T_CAPACITY_MIN_LEVEL'].update(pd17.pop('FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_T_CAPACITY_MIN_LEVEL_P'))
pd17['FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_SUBCONTRACTORS_T_CAPACITY_INFORMATION'].update(pd17.pop('FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_SUBCONTRACTORS_T_CAPACITY_INFORMATION_P'))
pd17['FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_SUBCONTRACTORS_T_CAPACITY_MIN_LEVEL'].update(pd17.pop('FD_LEFTI_F17_CONDITIONS_FOR_PARTICIPATION_TECHNICAL_CAPACITY_LEFTI_SUBCONTRACTORS_T_CAPACITY_MIN_LEVEL_P'))
pd17['FD_LEFTI_SERVICES_CONTRACTS_SPECIFIC_CONDITIONS_EXECUTION_SERVICE_RESERVED_PARTICULAR_PROFESSION'].update(pd17.pop('FD_LEFTI_SERVICES_CONTRACTS_SPECIFIC_CONDITIONS_EXECUTION_SERVICE_RESERVED_PARTICULAR_PROFESSION_P'))
pd17['FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_DESCRIPTION_CONTRACT_INFORMATION_DEFENCE_F17_DIVISION_INTO_LOTS_F17_DIV_INTO_LOT_YES_F17_ANNEX_B_ADDITIONAL_INFORMATION_ABOUT_LOTS'].update(pd17.pop('FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_DESCRIPTION_CONTRACT_INFORMATION_DEFENCE_F17_DIVISION_INTO_LOTS_F17_DIV_INTO_LOT_YES_F17_ANNEX_B_ADDITIONAL_INFORMATION_ABOUT_LOTS_P'))
pd17['FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_DESCRIPTION_CONTRACT_INFORMATION_DEFENCE_F17_DIVISION_INTO_LOTS_F17_DIV_INTO_LOT_YES_F17_ANNEX_B_LOT_DESCRIPTION'].update(pd17.pop('FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_DESCRIPTION_CONTRACT_INFORMATION_DEFENCE_F17_DIVISION_INTO_LOTS_F17_DIV_INTO_LOT_YES_F17_ANNEX_B_LOT_DESCRIPTION_P'))
pd17['FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_DESCRIPTION_CONTRACT_INFORMATION_DEFENCE_F17_DIVISION_INTO_LOTS_F17_DIV_INTO_LOT_YES_F17_ANNEX_B_NATURE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE'].update(pd17.pop('FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_DESCRIPTION_CONTRACT_INFORMATION_DEFENCE_F17_DIVISION_INTO_LOTS_F17_DIV_INTO_LOT_YES_F17_ANNEX_B_NATURE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE_P'))
pd17['FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_DESCRIPTION_CONTRACT_INFORMATION_DEFENCE_F17_FRAMEWORK_TOTAL_ESTIMATED_FREQUENCY_AWARDED_CONTRACTS'].update(pd17.pop('FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_DESCRIPTION_CONTRACT_INFORMATION_DEFENCE_F17_FRAMEWORK_TOTAL_ESTIMATED_FREQUENCY_AWARDED_CONTRACTS_P'))
pd17['FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_DESCRIPTION_CONTRACT_INFORMATION_DEFENCE_LOCATION_NUTS_LOCATION'].update(pd17.pop('FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_DESCRIPTION_CONTRACT_INFORMATION_DEFENCE_LOCATION_NUTS_LOCATION_P'))
pd17['FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_DESCRIPTION_CONTRACT_INFORMATION_DEFENCE_SHORT_CONTRACT_DESCRIPTION'].update(pd17.pop('FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_DESCRIPTION_CONTRACT_INFORMATION_DEFENCE_SHORT_CONTRACT_DESCRIPTION_P'))
pd17['FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_DESCRIPTION_CONTRACT_INFORMATION_DEFENCE_TITLE_CONTRACT'].update(pd17.pop('FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_DESCRIPTION_CONTRACT_INFORMATION_DEFENCE_TITLE_CONTRACT_P'))
pd17['FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_QUANTITY_SCOPE_NATURE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE'].update(pd17.pop('FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_QUANTITY_SCOPE_NATURE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE_P'))
pd17['FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_QUANTITY_SCOPE_OPTIONS_OPTION_DESCRIPTION'].update(pd17.pop('FD_OBJECT_CONTRACT_INFORMATION_DEFENCE_QUANTITY_SCOPE_OPTIONS_OPTION_DESCRIPTION_P'))
pd17['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE_FILE_REFERENCE_NUMBER'].update(pd17.pop('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE_FILE_REFERENCE_NUMBER_P'))
pd17['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE_LANGUAGE_LANGUAGE_OTHER'].update(pd17.pop('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE_LANGUAGE_LANGUAGE_OTHER_P'))
pd17['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE_TYPE_OF_PROCEDURE_DEFENCE_MAXIMUM_NUMBER_INVITED_OPE_OBJECTIVE_CRITERIA'].update(pd17.pop('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE_TYPE_OF_PROCEDURE_DEFENCE_MAXIMUM_NUMBER_INVITED_OPE_OBJECTIVE_CRITERIA_P'))

In [None]:
pd17 = pd17.rename(columns=lambda x_ re.sub('\_P$', '', x))
pd17.info()

In [None]:
fd17 = pd17.to_spark()

In [None]:
fd17 = fd17.withColumn("FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE.DAY`)"))
fd17 = fd17.withColumn("FD_LEFTI_CONTRACT_RELATING_CONDITIONS.CLEARING_LAST_DATE", expr("make_date(`FD_LEFTI_CONTRACT_RELATING_CONDITIONS.CLEARING_LAST_DATE.YEAR`, `FD_LEFTI_CONTRACT_RELATING_CONDITIONS.CLEARING_LAST_DATE.MONTH`, `FD_LEFTI_CONTRACT_RELATING_CONDITIONS.CLEARING_LAST_DATE.DAY`)"))
fd17 = fd17.withColumn("FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE", expr("make_date(`FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR`, `FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH`, `FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY`)"))
fd17 = fd17.withColumn("FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE", expr("make_date(`FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR`, `FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH`, `FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY`)"))
fd17 = fd17.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.DISPATCH_INVITATIONS_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.DISPATCH_INVITATIONS_DATE.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.DISPATCH_INVITATIONS_DATE.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.DISPATCH_INVITATIONS_DATE.DAY`)"))
fd17 = fd17.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F17.PREVIOUS_PUBLICATION_EXISTS_F17.PREVIOUS_PUBLICATION_NOTICE_F17.DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F17.PREVIOUS_PUBLICATION_EXISTS_F17.PREVIOUS_PUBLICATION_NOTICE_F17.DATE_OJ.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F17.PREVIOUS_PUBLICATION_EXISTS_F17.PREVIOUS_PUBLICATION_NOTICE_F17.DATE_OJ.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F17.PREVIOUS_PUBLICATION_EXISTS_F17.PREVIOUS_PUBLICATION_NOTICE_F17.DATE_OJ.DAY`)"))
fd17 = fd17.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.RECEIPT_LIMIT_DATE", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.RECEIPT_LIMIT_DATE.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.RECEIPT_LIMIT_DATE.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.RECEIPT_LIMIT_DATE.DAY`)"))

In [None]:
fd17.printSchema()

In [None]:
pd17 = fd17.to_pandas_on_spark()

pd17.info()

In [None]:
pd17 = pd17.drop(columns=['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE.YEAR'])
pd17 = pd17.drop(columns=['FD_LEFTI_CONTRACT_RELATING_CONDITIONS.CLEARING_LAST_DATE.DAY', 'FD_LEFTI_CONTRACT_RELATING_CONDITIONS.CLEARING_LAST_DATE.MONTH', 'FD_LEFTI_CONTRACT_RELATING_CONDITIONS.CLEARING_LAST_DATE.YEAR'])
pd17 = pd17.drop(columns=['FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY', 'FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH', 'FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR'])
pd17 = pd17.drop(columns=['FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY', 'FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH', 'FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR'])
pd17 = pd17.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.DISPATCH_INVITATIONS_DATE.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.DISPATCH_INVITATIONS_DATE.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.DISPATCH_INVITATIONS_DATE.YEAR'])
pd17 = pd17.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F17.PREVIOUS_PUBLICATION_EXISTS_F17.PREVIOUS_PUBLICATION_NOTICE_F17.DATE_OJ.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F17.PREVIOUS_PUBLICATION_EXISTS_F17.PREVIOUS_PUBLICATION_NOTICE_F17.DATE_OJ.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F17.PREVIOUS_PUBLICATION_EXISTS_F17.PREVIOUS_PUBLICATION_NOTICE_F17.DATE_OJ.YEAR'])
pd17 = pd17.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.RECEIPT_LIMIT_DATE.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.RECEIPT_LIMIT_DATE.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.RECEIPT_LIMIT_DATE.YEAR'])

In [None]:
pd17.info()

In [None]:
pd17["FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd17["FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE.NOTICE_DISPATCH_DATE"])
pd17["FD_LEFTI_CONTRACT_RELATING_CONDITIONS.CLEARING_LAST_DATE"]= ps.to_datetime(pd17["FD_LEFTI_CONTRACT_RELATING_CONDITIONS.CLEARING_LAST_DATE"])
pd17["FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"]= ps.to_datetime(pd17["FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"])
pd17["FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"]= ps.to_datetime(pd17["FD_OBJECT_CONTRACT_INFORMATION_DEFENCE.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"])
pd17["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.DISPATCH_INVITATIONS_DATE"]= ps.to_datetime(pd17["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.DISPATCH_INVITATIONS_DATE"])
pd17["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F17.PREVIOUS_PUBLICATION_EXISTS_F17.PREVIOUS_PUBLICATION_NOTICE_F17.DATE_OJ"]= ps.to_datetime(pd17["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F17.PREVIOUS_PUBLICATION_EXISTS_F17.PREVIOUS_PUBLICATION_NOTICE_F17.DATE_OJ"])
pd17["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.RECEIPT_LIMIT_DATE"]= ps.to_datetime(pd17["FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_DEFENCE.RECEIPT_LIMIT_DATE"])

In [None]:
pd17.info() 

In [None]:
fd17 = pd17.to_spark()

fd17.write.parquet("s3a_//falk2210/fd17_210921.parquet")

# 18 AWARD 

<a id='d18' />

In [None]:
d18 = spark.read.json("falk/18.json/*.json")
#d18.printSchema()

In [None]:
pd18 = d18.to_pandas_on_spark()

In [None]:
pd18 = pd18.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd18.info()

In [None]:
pd18 = pd18.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd18 = pd18.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd18.info()

In [None]:
pd18 = pd18.rename(columns=lambda x_ re.sub('CONTRACT_AWARD_DEFENCE_', '', x))

#pd18.info()

In [None]:
pd18['FD_AWARD_OF_CONTRACT_DEFENCE_CONTRACT_TITLE'].update(pd18.pop('FD_AWARD_OF_CONTRACT_DEFENCE_CONTRACT_TITLE_P'))
pd18['FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_ADDITIONAL_INFORMATION'].update(pd18.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_ADDITIONAL_INFORMATION_P'))
pd18['FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DEFENCE_DESCRIPTION_AWARD_NOTICE_INFORMATION_DEFENCE_LOCATION_NUTS_LOCATION'].update(pd18.pop('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DEFENCE_DESCRIPTION_AWARD_NOTICE_INFORMATION_DEFENCE_LOCATION_NUTS_LOCATION_P'))
pd18['FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DEFENCE_DESCRIPTION_AWARD_NOTICE_INFORMATION_DEFENCE_SHORT_CONTRACT_DESCRIPTION'].update(pd18.pop('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DEFENCE_DESCRIPTION_AWARD_NOTICE_INFORMATION_DEFENCE_SHORT_CONTRACT_DESCRIPTION_P'))
pd18['FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DEFENCE_DESCRIPTION_AWARD_NOTICE_INFORMATION_DEFENCE_TITLE_CONTRACT'].update(pd18.pop('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DEFENCE_DESCRIPTION_AWARD_NOTICE_INFORMATION_DEFENCE_TITLE_CONTRACT_P'))
pd18['FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE_ADMINISTRATIVE_INFORMATION_FILE_REFERENCE_NUMBER'].update(pd18.pop('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE_ADMINISTRATIVE_INFORMATION_FILE_REFERENCE_NUMBER_P'))
pd18['FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION'].update(pd18.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION_P'))

In [None]:
pd18 = pd18.rename(columns=lambda x_ re.sub('\_P$', '', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('F18_', '', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('_F18', '', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_', '', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('DOFFIN_FORM_TYPE_', '', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_', '', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('ORGANISATION_', '', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18_PREVIOUS_PUBLICATION_EXISTS_F18', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_PREVIOUS_PUBLICATION_EXISTS', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('PREVIOUS_PUBLICATION_NOTICE_F18', 'PREVIOUS_PUBLICATION_NOTICE', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('PREVIOUS_NOTICE_BUYER_PROFILE_F18', 'PREVIOUS_NOTICE_BUYER_PROFILE', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('AUTHORITY_INFORMATION_', '', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('E_MAILS_', '', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('_DEFENCE', '', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CONTRACT_AWARD_CA_CE_CONCESSIONAIRE_PROFILE_', 'FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_', 'FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_PREVIOUS_PUBLICATION_EXISTS_', 'FD_PREVIOUS_PUBLICATION_EXISTS_', x))

In [None]:
pd18['FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_ADDRESS'].update(pd18.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_ADDRESS'))
pd18['FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_COUNTRY_VALUE'].update(pd18.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_COUNTRY_VALUE'))
pd18['FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_E_MAIL'].update(pd18.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_E_MAIL'))
#pd18['FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_FAX'].update(pd18.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_FAX'))
pd18['FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_NATIONALID'].update(pd18.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_NATIONALID'))
pd18['FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_OFFICIALNAME'].update(pd18.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_OFFICIALNAME'))
pd18['FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_PHONE'].update(pd18.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_PHONE'))
pd18['FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_POSTAL_CODE'].update(pd18.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_POSTAL_CODE'))
pd18['FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_TOWN'].update(pd18.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_TOWN'))
pd18['FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_URL'].update(pd18.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_LODGING_INFORMATION_FOR_SERVICE_URL'))

In [None]:
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_ECONOMIC_OPERATOR_NAME_ADDRESS_', 'AWARDED_CONTRACT_CONTRACTOR_ADDRESS_CONTRACTOR_', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_', 'AWARD_CONTRACT_', x)) #ITEM
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_MEDIATION_PROCEDURE_BODY_RESPONSIBLE_', 'COMPLEMENTARY_INFO_ADDRESS_MEDIATION_BODY_', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_APPEAL_PROCEDURE_BODY_RESPONSIBLE_', 'COMPLEMENTARY_INFO_ADDRESS_REVIEW_BODY_', x))
#pd18 = pd18.rename(columns=lambda x_ re.sub('', 'COMPLEMENTARY_INFO_ADDRESS_REVIEW_BODY_', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_NOTICE_DISPATCH_DATE_', 'COMPLEMENTARY_INFO_', x)) #COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_', 'CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_', x))
#pd18 = pd18.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_ADDRESS_FURTHER_INFO_', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_OR_CONTRACTING_ENTITY_AND_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_YES_CONTACT_DATA_OTHER_BEHALF_CONTRACTING_AUTORITHY_', 'CONTRACTING_BODY_ADDRESS_PARTICIPATION_', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_OR_CONTRACTING_ENTITY_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_TYPE_OF_CONTRACTING_AUTHORITY_', 'CONTRACTING_BODY_', x)) #CONTRACTING_BODY_CA_ACTIVITY_VALUE
#pd18 = pd18.rename(columns=lambda x_ re.sub('', 'LEFTI_', x)) #LEFTI_PARTICULAR_PROFESSION_CTYPE
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_CPV_', 'OBJECT_CONTRACT_', x))  #OBJECT_CONTRACT_CPV_MAIN_CPV_CODE_CODE
#pd18 = pd18.rename(columns=lambda x_ re.sub('', 'OBJECT_CONTRACT_OBJECT_DESCR_', x)) #OBJECT_CONTRACT_OBJECT_DESCR_CPV_ADDITIONAL
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_', 'OBJECT_CONTRACT_OBJECT_DESCR_', x))  #OBJECT_CONTRACT_OBJECT_DESCR_ITEM
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_', 'OBJECT_CONTRACT_', x)) #OBJECT_CONTRACT_TITLE
#pd18 = pd18.rename(columns=lambda x_ re.sub('', 'PROCEDURE_', x)) #PROCEDURE_DATE_RECEIPT_TENDERS
pd18 = pd18.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_TITLE_CONTRACT', 'OBJECT_CONTRACT_OBJECT_DESCR_TITLE', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_SHORT_CONTRACT_DESCRIPTION', 'OBJECT_CONTRACT_OBJECT_DESCR_SHORT_DESCR', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_MORE_INFORMATION_TO_SUB_CONTRACTED_CONTRACT_LIKELY_SUB_CONTRACTED_WITH_EXCLUDING_VAT_VALUE_CURRENCY', 'AWARDED_CONTRACT_VAL_SUBCONTRACTING_CURRENCY', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_MORE_INFORMATION_TO_SUB_CONTRACTED_CONTRACT_LIKELY_SUB_CONTRACTED_WITH_EXCLUDING_VAT_VALUE_text', 'AWARDED_CONTRACT_VAL_SUBCONTRACTING_text', x))

pd18 = pd18.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_CURRENCY', 'OBJECT_CONTRACT_VAL_TOTAL_CURRENCY', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_HIGH_VALUE', 'OBJECT_CONTRACT_VAL_RANGE_TOTAL_HIGH', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_LOW_VALUE', 'OBJECT_CONTRACT_VAL_RANGE_TOTAL_LOW', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_VALUE_COST', 'OBJECT_CONTRACT_VAL_TOTAL_text', x))

pd18 = pd18.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_CURRENCY', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_HIGH_VALUE', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_HIGH', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_LOW_VALUE', 'AWARDED_CONTRACT_VALUES_VAL_RANGE_TOTAL_LOW', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_INITIAL_ESTIMATED_TOTAL_VALUE_CONTRACT_CURRENCY', 'AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_CURRENCY', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_VALUE_INFORMATION_INITIAL_ESTIMATED_TOTAL_VALUE_CONTRACT_VALUE_COST', 'AWARDED_CONTRACT_VALUES_VAL_ESTIMATED_TOTAL_text', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_TYPE_OF_PROCEDURE_PT_NEGOTIATED_WITHOUT_PUBLICATION_CONTRACT_NOTICE_ANNEX_D_REASON_CONTRACT_LAWFUL', 'PROCEDURE_PT_AWARD_CONTRACT_WITHOUT_CALL_D_JUSTIFICATION', x))
#pd18 = pd18.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_URL_DOCUMENT', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CONTRACT_AWARD_INTERNET_ADDRESSES_CONTRACT_AWARD_URL_PARTICIPATE', 'CONTRACTING_BODY_URL_PARTICIPATION', x))
#pd18 = pd18.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_URL_TOOL', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CONTRACT_AWARD_INTERNET_ADDRESSES_CONTRACT_AWARD_URL_GENERAL', 'CONTRACTING_BODY_ADDRESS_FURTHER_INFO_URL_GENERAL', x))
#pd18 = pd18.rename(columns=lambda x_ re.sub('', 'PROCEDURE_URL_NATIONAL_PROCEDURE', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CONTRACT_AWARD_INTERNET_ADDRESSES_CONTRACT_AWARD_', 'CONTRACTING_BODY_', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_AWARD_CRITERIA_CONTRACT_AWARD_NOTICE_INFORMATION_AWARD_CRITERIA_DETAIL_MOST_ECONOMICALLY_ADVANTAGEOUS_TENDER_SHORT_CRITERIA_DEFINITION_', 'PROCEDURE_', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_TITLE', 'AWARD_CONTRACT_TITLE', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_LOT_NUMBER', 'AWARD_CONTRACT_LOT_NO', x)) 
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_ADDITIONAL_INFORMATION', 'COMPLEMENTARY_INFO_INFO_ADD', x)) 
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_FILE_REFERENCE_NUMBER', 'OBJECT_CONTRACT_REFERENCE_NUMBER', x)) 
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_OR_CONTRACTING_ENTITY_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_', 'FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_OR_CONTRACTING_ENTITY_AND_PURCHASING_ON_BEHALF_ACTIVITIES_OF_CONTRACTING_ENTITY_', 'FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_ACTIVITIES_OF_CONTRACTING_ENTITY_', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_MORE_INFORMATION_TO_SUB_CONTRACTED_CONTRACT_LIKELY_SUB_CONTRACTED_WITH_', 'AWARD_CONTRACT_CONTRACT_LIKELY_SUB_CONTRACTED_', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_ACTIVITIES_OF_CONTRACTING_ENTITY_', 'FD_CONTRACTING_ACTIVITIES_OF_CONTRACTING_ENTITY_', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_TYPE_CONTRACT_W_PUB_', 'OBJECT_CONTRACT_TYPE_CONTRACT_LOCATION_', x))
pd18 = pd18.rename(columns=lambda x_ re.sub('SERVICE_CATEGORY_PUB', 'SERVICE_CATEGORY', x))

In [None]:
pd18.info()

In [None]:
fd18 = pd18.to_spark()

In [None]:
"""
fd18 = fd18.withColumn("FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE", expr("make_date(`FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE.YEAR`, `FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE.MONTH`, `FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE.DAY`)"))
fd18 = fd18.withColumn("FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE.DAY`)"))
fd18 = fd18.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.CNT_NOTICE_INFORMATION_F18.DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.CNT_NOTICE_INFORMATION_F18.DATE_OJ.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.CNT_NOTICE_INFORMATION_F18.DATE_OJ.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.CNT_NOTICE_INFORMATION_F18.DATE_OJ.DAY`)"))
fd18 = fd18.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.EX_ANTE_NOTICE_INFORMATION.DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.DAY`)"))
fd18 = fd18.withColumn("FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.PREVIOUS_PUBLICATION_NOTICE_F18.DATE_OJ", expr("make_date(`FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.PREVIOUS_PUBLICATION_NOTICE_F18.DATE_OJ.YEAR`, `FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.PREVIOUS_PUBLICATION_NOTICE_F18.DATE_OJ.MONTH`, `FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.PREVIOUS_PUBLICATION_NOTICE_F18.DATE_OJ.DAY`)"))
"""

In [None]:
#fd18.printSchema()

In [None]:
#pd18 = fd18.to_pandas_on_spark()

#pd18.info()

In [None]:
"""
pd18 = pd18.drop(columns=['FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE.DAY', 'FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE.MONTH', 'FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE.YEAR'])
pd18 = pd18.drop(columns=['FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE.YEAR'])
pd18 = pd18.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.CNT_NOTICE_INFORMATION_F18.DATE_OJ.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.CNT_NOTICE_INFORMATION_F18.DATE_OJ.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.CNT_NOTICE_INFORMATION_F18.DATE_OJ.YEAR'])
pd18 = pd18.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.EX_ANTE_NOTICE_INFORMATION.DATE_OJ.YEAR'])
pd18 = pd18.drop(columns=['FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.PREVIOUS_PUBLICATION_NOTICE_F18.DATE_OJ.DAY', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.PREVIOUS_PUBLICATION_NOTICE_F18.DATE_OJ.MONTH', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.PREVIOUS_PUBLICATION_NOTICE_F18.DATE_OJ.YEAR'])
"""

In [None]:
#pd18.info()

In [None]:
"""
pd18["FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE"]= ps.to_datetime(pd18["FD_AWARD_OF_CONTRACT_DEFENCE.CONTRACT_AWARD_DATE"])
pd18["FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd18["FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE"])
pd18["FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.CNT_NOTICE_INFORMATION_F18.DATE_OJ"]= ps.to_datetime(pd18["FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.CNT_NOTICE_INFORMATION_F18.DATE_OJ"])
pd18["FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.EX_ANTE_NOTICE_INFORMATION.DATE_OJ"]= ps.to_datetime(pd18["FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.EX_ANTE_NOTICE_INFORMATION.DATE_OJ"])
pd18["FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.PREVIOUS_PUBLICATION_NOTICE_F18.DATE_OJ"]= ps.to_datetime(pd18["FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_DEFENCE.ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18.PREVIOUS_PUBLICATION_EXISTS_F18.PREVIOUS_PUBLICATION_NOTICE_F18.DATE_OJ"])
"""

In [None]:
#pd18.info() 

In [None]:
#fd18 = pd18.to_spark()

In [None]:
#fd18.write.parquet("s3a_//falk2210/fd18_210921.parquet")

In [None]:
fd18.write.mode("append").json("falk2210/f_award.json")

# 101 PRI 

<a id='d101' />

In [None]:
d101 = spark.read.json("falk/101.json/*.json")
#d101.printSchema()

In [None]:
pd101 = d101.to_pandas_on_spark()

In [None]:
pd101 = pd101.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd101.info()

In [None]:
pd101 = pd101.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd101 = pd101.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd101.info()

In [None]:
pd101 = pd101.rename(columns=lambda x_ re.sub('VOLUNTARY_PRIOR_INFORMATION_', '', x))
#pd101.info()

In [None]:
#pd101 = pd101.rename(columns=lambda x_ re.sub('_', '_', x))
#pd101.info()

In [None]:
pd101 = pd101.rename(columns=lambda x_ re.sub('\_P$', '', x))
#pd101.info()

In [None]:
pd101 = pd101.rename(columns=lambda x_ re.sub('_PRIOR_INFORMATION', '', x))
pd101 = pd101.rename(columns=lambda x_ re.sub('_OTH', '', x))
pd101 = pd101.rename(columns=lambda x_ re.sub('FD_AUTHORITY_NAME_ADDRESSES_CONTACT_', 'FD_AUTHORITY_NAME_ADDRESSES_CONTACT_', x))
pd101 = pd101.rename(columns=lambda x_ re.sub('FD_OBJECT_', 'FD_OBJECT_', x))
pd101 = pd101.rename(columns=lambda x_ re.sub('FD_INFO_', 'FD_INFO_', x))
pd101 = pd101.rename(columns=lambda x_ re.sub('DOFFIN_APPENDIX_DOFFIN_FORM_TYPE_VOLUNTARY_REFERENCE_SECTION_REFERENCE_DOFFIN', 'DOFFIN_APPENDIX_DOFFIN_FORM_TYPE_NATIONAL_REFERENCE_SECTION_REFERENCE_DOFFIN', x))
pd101 = pd101.rename(columns=lambda x_ re.sub('FD_AUTHORITY_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_', 'FD_AUTHORITY_NAME_ADDRESSES_CONTACT_INTERNET_ADDRESSES_', x))
pd101 = pd101.rename(columns=lambda x_ re.sub('FD_OBJECT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_DAYS', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_DAYS', x))
pd101 = pd101.rename(columns=lambda x_ re.sub('FD_OBJECT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_INTERVAL_DATE_', x))
pd101 = pd101.rename(columns=lambda x_ re.sub('FD_OBJECT_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_MONTHS', 'FD_OBJECT_WORKS_SCHEDULED_DATE_PERIOD_PERIOD_WORK_DATE_STARTING_MONTHS', x))
pd101 = pd101.rename(columns=lambda x_ re.sub('FD_OBJECT_SCHEDULED_DATE_PERIOD_PROCEDURE_DATE_STARTING_', 'FD_OBJECT_PROCEDURE_DATE_STARTING_', x))
pd101 = pd101.rename(columns=lambda x_ re.sub('FD_OBJECT_SITE_OR_LOCATION_LABEL', 'FD_OBJECT_TYPE_CONTRACT_PLACE_DELIVERY_SITE_OR_LOCATION_LABEL', x))

In [None]:
pd101.info()

In [None]:
fd101 = pd101.to_spark()

In [None]:
pri2 = ps.concat([pri, pd101], ignore_index=True)

In [None]:
pri2.info()

In [None]:
fd101 = fd101.withColumn("FD_OTH_INFO_NOTICE_DISPATCH_DATE", expr("make_date(`FD_OTH_INFO_NOTICE_DISPATCH_DATE.YEAR`, `FD_OTH_INFO_NOTICE_DISPATCH_DATE.MONTH`, `FD_OTH_INFO_NOTICE_DISPATCH_DATE.DAY`)"))
fd101 = fd101.withColumn("FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE", expr("make_date(`FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR`, `FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH`, `FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY`)"))
fd101 = fd101.withColumn("FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE", expr("make_date(`FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR`, `FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH`, `FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY`)"))
fd101 = fd101.withColumn("FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING", expr("make_date(`FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.YEAR`, `FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.MONTH`, `FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.DAY`)"))

In [None]:
fd101.printSchema()

In [None]:
pd101 = fd101.to_pandas_on_spark()

pd101.info()

In [None]:
pd101 = pd101.drop(columns=['FD_OTH_INFO_NOTICE_DISPATCH_DATE.DAY', 'FD_OTH_INFO_NOTICE_DISPATCH_DATE.MONTH', 'FD_OTH_INFO_NOTICE_DISPATCH_DATE.YEAR'])
pd101 = pd101.drop(columns=['FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.DAY', 'FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.MONTH', 'FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE.YEAR'])
pd101 = pd101.drop(columns=['FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.DAY', 'FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.MONTH', 'FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE.YEAR'])
pd101 = pd101.drop(columns=['FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.DAY', 'FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.MONTH', 'FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.YEAR'])
pd101 = pd101.drop(columns=['FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.DAY', 'FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.MONTH', 'FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING.YEAR'])

In [None]:
pd101.info()

In [None]:
pd101["FD_OTH_INFO_NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd101["FD_OTH_INFO_NOTICE_DISPATCH_DATE"])
pd101["FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"]= ps.to_datetime(pd101["FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.END_DATE"])
pd101["FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"]= ps.to_datetime(pd101["FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PERIOD_WORK_DATE_STARTING.INTERVAL_DATE.START_DATE"])
pd101["FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING"]= ps.to_datetime(pd101["FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING"])
pd101["FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING"]= ps.to_datetime(pd101["FD_OBJECT_PRIOR_INFORMATION.SCHEDULED_DATE_PERIOD.PROCEDURE_DATE_STARTING"])

In [None]:
pd101.info() 

In [None]:
fd101.write.parquet("s3a_//falk2210/fd101_210921.parquet")

# 102 CONTRACT 

<a id='d102' />

In [None]:
d102 = spark.read.json("falk/102.json/*.json")
#d102.printSchema()

In [None]:
pd102 = d102.to_pandas_on_spark()

In [None]:
pd102 = pd102.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd102.info()

In [None]:
pd102 = pd102.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd102 = pd102.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd102.info()

In [None]:
pd102 = pd102.rename(columns=lambda x_ re.sub('VOLUNTARY_CONTRACT_', '', x))
#pd102.info()

In [None]:
pd102['FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_ADDITIONAL_INFORMATION'].update(pd102.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_NOTICE_ADDITIONAL_INFORMATION_P'))
pd102['FD_OBJECT_CONTRACT_INFORMATION_CONDITIONS_FOR_PARTICIPATION_F102'].update(pd102.pop('FD_OBJECT_CONTRACT_INFORMATION_CONDITIONS_FOR_PARTICIPATION_F102_P'))
pd102['FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_SHORT_CONTRACT_DESCRIPTION'].update(pd102.pop('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_SHORT_CONTRACT_DESCRIPTION_P'))
pd102['FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_SITE_OR_LOCATION_LABEL'].update(pd102.pop('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_SITE_OR_LOCATION_LABEL_P'))
pd102['FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_TITLE_CONTRACT'].update(pd102.pop('FD_OBJECT_CONTRACT_INFORMATION_DESCRIPTION_CONTRACT_INFORMATION_TITLE_CONTRACT_P'))
pd102['FD_OBJECT_CONTRACT_INFORMATION_NATURE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE'].update(pd102.pop('FD_OBJECT_CONTRACT_INFORMATION_NATURE_QUANTITY_SCOPE_TOTAL_QUANTITY_OR_SCOPE_P'))
pd102['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_FILE_REFERENCE_NUMBER'].update(pd102.pop('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_FILE_REFERENCE_NUMBER_P'))
pd102['FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_LANGUAGE_LANGUAGE_OTHER'].update(pd102.pop('FD_PROCEDURE_DEFINITION_CONTRACT_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_NOTICE_LANGUAGE_LANGUAGE_OTHER_P'))

In [None]:
pd102 = pd102.rename(columns=lambda x_ re.sub('\_P$', '', x))
pd102.info()

In [None]:
fd102 = pd102.to_spark()

fd102.write.parquet("s3a_//falk2210/fd102_210921.parquet")

# 103 AWARD 

<a id='d103' />

In [None]:
d103 = spark.read.json("falk/103.json/*.json")
#d103.printSchema()

In [None]:
pd103 = d103.to_pandas_on_spark()

pd103 = pd103.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_VERSION', 'DOFFIN_VERSION', x))
#pd103.info()

In [None]:
pd103 = pd103.rename(columns=lambda x_ re.sub('DOFFIN_ESENDERS_', '', x))

pd103 = pd103.rename(columns=lambda x_ re.sub('FORM_SECTION_', '', x))
#pd103.info()

In [None]:
pd103 = pd103.rename(columns=lambda x_ re.sub('VOLUNTARY_CONTRACT_AWARD_', '', x))
#pd103.info()

In [None]:
pd103['FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_SHORT_CONTRACT_DESCRIPTION'].update(pd103.pop('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_SHORT_CONTRACT_DESCRIPTION_P'))
pd103['FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_SITE_OR_LOCATION_LABEL'].update(pd103.pop('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_SITE_OR_LOCATION_LABEL_P'))
pd103['FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_TITLE_CONTRACT'].update(pd103.pop('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_TITLE_CONTRACT_P'))
pd103['FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_AWARD_FILE_REFERENCE_NUMBER'].update(pd103.pop('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_AWARD_FILE_REFERENCE_NUMBER_P'))

In [None]:
pd103 = pd103.rename(columns=lambda x_ re.sub('\_P$', '', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('F03_', '', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('_F03', '', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('CONTACT_DATA_WITHOUT_RESPONSIBLE_NAME_', '', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('DOFFIN_FORM_TYPE_', '', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_', '', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('ORGANISATION_', '', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_F18_PREVIOUS_PUBLICATION_EXISTS_F03', 'FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_PREVIOUS_PUBLICATION_INFORMATION_NOTICE_PREVIOUS_PUBLICATION_EXISTS', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('PREVIOUS_PUBLICATION_NOTICE_F03', 'PREVIOUS_PUBLICATION_NOTICE', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('PREVIOUS_NOTICE_BUYER_PROFILE_F03', 'PREVIOUS_NOTICE_BUYER_PROFILE', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('AUTHORITY_INFORMATION_', '', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('E_MAILS_', '', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_CONTRACTING_CONTRACT_AWARD_NAME_ADDRESSES_CONTACT_CONTRACT_AWARD_CA_CE_CONCESSIONAIRE_PROFILE_', 'FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_', 'FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_', x))

In [None]:
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_ECONOMIC_OPERATOR_NAME_ADDRESS_', 'AWARDED_CONTRACT_CONTRACTOR_ADDRESS_CONTRACTOR_', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_', 'AWARD_CONTRACT_', x)) #ITEM
#pd103 = pd103.rename(columns=lambda x_ re.sub('', 'COMPLEMENTARY_INFO_ADDRESS_MEDIATION_BODY_', x))
#pd103 = pd103.rename(columns=lambda x_ re.sub('', 'COMPLEMENTARY_INFO_ADDRESS_REVIEW_BODY_', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_NOTICE_DISPATCH_DATE_', 'COMPLEMENTARY_INFO_', x)) #COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_CONTRACTING_NAME_ADDRESSES_CONTACT_CA_CE_CONCESSIONAIRE_PROFILE_', 'CONTRACTING_BODY_ADDRESS_CONTRACTING_BODY_', x))
#pd103 = pd103.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_ADDRESS_FURTHER_INFO_', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_CONTRACTING_CONTRACT_AWARD_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_PURCHASING_ON_BEHALF_YES_CONTACT_DATA_OTHER_BEHALF_CONTRACTING_AUTORITHY_', 'CONTRACTING_BODY_ADDRESS_PARTICIPATION_', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_CONTRACTING_CONTRACT_AWARD_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_TYPE_OF_CONTRACTING_AUTHORITY_OTHER_', 'CONTRACTING_BODY_', x)) #CONTRACTING_BODY_CA_ACTIVITY_VALUE
#pd103 = pd103.rename(columns=lambda x_ re.sub('', 'LEFTI_', x)) #LEFTI_PARTICULAR_PROFESSION_CTYPE
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_CPV_', 'OBJECT_CONTRACT_', x))  #OBJECT_CONTRACT_CPV_MAIN_CPV_CODE_CODE
#pd103 = pd103.rename(columns=lambda x_ re.sub('', 'OBJECT_CONTRACT_OBJECT_DESCR_', x)) #OBJECT_CONTRACT_OBJECT_DESCR_CPV_ADDITIONAL
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_', 'OBJECT_CONTRACT_OBJECT_DESCR_', x))  #OBJECT_CONTRACT_OBJECT_DESCR_ITEM
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_AWARD_OF_CONTRACT_', 'OBJECT_CONTRACT_', x)) #OBJECT_CONTRACT_TITLE
#pd103 = pd103.rename(columns=lambda x_ re.sub('', 'PROCEDURE_', x)) #PROCEDURE_DATE_RECEIPT_TENDERS
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_TITLE_CONTRACT', 'OBJECT_CONTRACT_OBJECT_DESCR_TITLE', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_DESCRIPTION_AWARD_NOTICE_INFORMATION_SHORT_CONTRACT_DESCRIPTION', 'OBJECT_CONTRACT_OBJECT_DESCR_SHORT_DESCR', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_CURRENCY', 'OBJECT_CONTRACT_VAL_TOTAL_CURRENCY', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_HIGH_VALUE', 'OBJECT_CONTRACT_VAL_RANGE_TOTAL_HIGH', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_RANGE_VALUE_COST_LOW_VALUE', 'OBJECT_CONTRACT_VAL_RANGE_TOTAL_LOW', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_VALUE_COST', 'OBJECT_CONTRACT_VAL_TOTAL_text', x))
#pd103 = pd103.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_URL_DOCUMENT', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_CONTRACTING_CONTRACT_AWARD_NAME_ADDRESSES_CONTACT_CONTRACT_AWARD_INTERNET_ADDRESSES_CONTRACT_AWARD_URL_PARTICIPATE', 'CONTRACTING_BODY_URL_PARTICIPATION', x))
#pd103 = pd103.rename(columns=lambda x_ re.sub('', 'CONTRACTING_BODY_URL_TOOL', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_CONTRACTING_CONTRACT_AWARD_NAME_ADDRESSES_CONTACT_CONTRACT_AWARD_INTERNET_ADDRESSES_CONTRACT_AWARD_URL_GENERAL', 'CONTRACTING_BODY_ADDRESS_FURTHER_INFO_URL_GENERAL', x))
#pd103 = pd103.rename(columns=lambda x_ re.sub('', 'PROCEDURE_URL_NATIONAL_PROCEDURE', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_CONTRACTING_CONTRACT_AWARD_NAME_ADDRESSES_CONTACT_CONTRACT_AWARD_INTERNET_ADDRESSES_CONTRACT_AWARD_', 'CONTRACTING_BODY_', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_AWARD_CRITERIA_CONTRACT_AWARD_NOTICE_INFORMATION_AWARD_CRITERIA_DETAIL_MOST_ECONOMICALLY_ADVANTAGEOUS_TENDER_SHORT_CRITERIA_DEFINITION_', 'PROCEDURE_', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_CONTRACT_TITLE', 'AWARD_CONTRACT_TITLE', x))
#pd103 = pd103.rename(columns=lambda x_ re.sub('', 'AWARD_CONTRACT_LOT_NO', x)) 
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_ADDITIONAL_INFORMATION', 'COMPLEMENTARY_INFO_INFO_ADD', x)) 
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_ADMINISTRATIVE_INFORMATION_CONTRACT_AWARD_FILE_REFERENCE_NUMBER', 'OBJECT_CONTRACT_REFERENCE_NUMBER', x))
pd103 = pd103.rename(columns=lambda x_ re.sub('FD_CONTRACTING_CONTRACT_AWARD_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_', 'FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_', x))

In [None]:
pd103.info()

In [None]:
#pd103.head()

In [None]:
fd103 = pd103.to_spark()

In [None]:
"""
fd103 = fd103.withColumn("FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE", expr("make_date(`FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE.YEAR`, `FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE.MONTH`, `FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE.DAY`)"))
fd103 = fd103.withColumn("FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE", expr("make_date(`FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE.YEAR`, `FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE.MONTH`, `FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE.DAY`)"))
"""

In [None]:
#fd103.printSchema()

In [None]:
#pd103 = fd103.to_pandas_on_spark()

#pd103.info()

In [None]:
"""
pd103 = pd103.drop(columns=['FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE.DAY', 'FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE.MONTH', 'FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE.YEAR'])
pd103 = pd103.drop(columns=['FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE.DAY', 'FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE.MONTH', 'FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE.YEAR'])
"""

In [None]:
#pd103.info()

In [None]:
"""
pd103["FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE"]= ps.to_datetime(pd103["FD_AWARD_OF_CONTRACT.CONTRACT_AWARD_DATE"])
pd103["FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE"]= ps.to_datetime(pd103["FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD.NOTICE_DISPATCH_DATE"])
"""

In [None]:
#pd103.info() 

In [None]:
#fd103 = pd103.to_spark()

In [None]:
#fd103.write.parquet("s3a_//falk2210/fd103_210921.parquet")

In [None]:
fd103.write.mode("append").json("falk2210/f_award.json")

# read all awards

In [None]:
f_award = spark.read.json("falk2210/f_award.json/*.json")

In [None]:
f_award.write.mode("append").json("falk2210/alla_award.json")

In [None]:
f_award = spark.read.json("falk2210/alla_award.json/*.json")

In [None]:
f_award = f_award.withColumn("AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT", expr("make_date(`AWARD_CONTRACT_CONTRACT_AWARD_DATE_YEAR`, `AWARD_CONTRACT_CONTRACT_AWARD_DATE_MONTH`, `AWARD_CONTRACT_CONTRACT_AWARD_DATE_DAY`)"))
f_award = f_award.withColumn("COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE", expr("make_date(`COMPLEMENTARY_INFO_YEAR`, `COMPLEMENTARY_INFO_MONTH`, `COMPLEMENTARY_INFO_DAY`)")) 
f_award = f_award.withColumn("FD_PREVIOUS_PUBLICATION_EXISTS_CNT_NOTICE_INFORMATION_DATE_OJ", expr("make_date(`FD_PREVIOUS_PUBLICATION_EXISTS_CNT_NOTICE_INFORMATION_DATE_OJ_YEAR`, `FD_PREVIOUS_PUBLICATION_EXISTS_CNT_NOTICE_INFORMATION_DATE_OJ_MONTH`, `FD_PREVIOUS_PUBLICATION_EXISTS_CNT_NOTICE_INFORMATION_DATE_OJ_DAY`)"))
f_award = f_award.withColumn("FD_PREVIOUS_PUBLICATION_EXISTS_DATE_OJ", expr("make_date(`FD_PREVIOUS_PUBLICATION_EXISTS_DATE_OJ_YEAR`, `FD_PREVIOUS_PUBLICATION_EXISTS_DATE_OJ_MONTH`, `FD_PREVIOUS_PUBLICATION_EXISTS_DATE_OJ_DAY`)")) 
f_award = f_award.withColumn("FD_PREVIOUS_PUBLICATION_EXISTS_EX_ANTE_NOTICE_INFORMATION_DATE_OJ", expr("make_date(`FD_PREVIOUS_PUBLICATION_EXISTS_EX_ANTE_NOTICE_INFORMATION_DATE_OJ_YEAR`, `FD_PREVIOUS_PUBLICATION_EXISTS_EX_ANTE_NOTICE_INFORMATION_DATE_OJ_MONTH`, `FD_PREVIOUS_PUBLICATION_EXISTS_EX_ANTE_NOTICE_INFORMATION_DATE_OJ_DAY`)")) 
f_award = f_award.withColumn("FD_PREVIOUS_PUBLICATION_EXISTS_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ", expr("make_date(`FD_PREVIOUS_PUBLICATION_EXISTS_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_YEAR`, `FD_PREVIOUS_PUBLICATION_EXISTS_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_MONTH`, `FD_PREVIOUS_PUBLICATION_EXISTS_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_DAY`)")) 
f_award = f_award.withColumn("FD_PREVIOUS_PUBLICATION_EXISTS_PREVIOUS_PUBLICATION_NOTICE_DATE_OJ", expr("make_date(`FD_PREVIOUS_PUBLICATION_EXISTS_PREVIOUS_PUBLICATION_NOTICE_DATE_OJ_YEAR`, `FD_PREVIOUS_PUBLICATION_EXISTS_PREVIOUS_PUBLICATION_NOTICE_DATE_OJ_MONTH`, `FD_PREVIOUS_PUBLICATION_EXISTS_PREVIOUS_PUBLICATION_NOTICE_DATE_OJ_DAY`)")) 


In [None]:
#f_award.write.parquet("falk2210/f_award.parquet")

In [None]:
#f_award.printSchema()

In [None]:
pf_award = f_award.to_pandas_on_spark()

In [None]:
pf_award = pf_award.drop(columns=['AWARD_CONTRACT_CONTRACT_AWARD_DATE_DAY', 'AWARD_CONTRACT_CONTRACT_AWARD_DATE_MONTH', 'AWARD_CONTRACT_CONTRACT_AWARD_DATE_YEAR'])
pf_award = pf_award.drop(columns=['COMPLEMENTARY_INFO_DAY', 'COMPLEMENTARY_INFO_MONTH', 'COMPLEMENTARY_INFO_YEAR'])
pf_award = pf_award.drop(columns=['FD_PREVIOUS_PUBLICATION_EXISTS_CNT_NOTICE_INFORMATION_DATE_OJ_DAY', 'FD_PREVIOUS_PUBLICATION_EXISTS_CNT_NOTICE_INFORMATION_DATE_OJ_MONTH', 'FD_PREVIOUS_PUBLICATION_EXISTS_CNT_NOTICE_INFORMATION_DATE_OJ_YEAR'])
pf_award = pf_award.drop(columns=['FD_PREVIOUS_PUBLICATION_EXISTS_DATE_OJ_DAY', 'FD_PREVIOUS_PUBLICATION_EXISTS_DATE_OJ_MONTH', 'FD_PREVIOUS_PUBLICATION_EXISTS_DATE_OJ_YEAR'])
pf_award = pf_award.drop(columns=['FD_PREVIOUS_PUBLICATION_EXISTS_EX_ANTE_NOTICE_INFORMATION_DATE_OJ_DAY', 'FD_PREVIOUS_PUBLICATION_EXISTS_EX_ANTE_NOTICE_INFORMATION_DATE_OJ_MONTH', 'FD_PREVIOUS_PUBLICATION_EXISTS_EX_ANTE_NOTICE_INFORMATION_DATE_OJ_YEAR'])
pf_award = pf_award.drop(columns=['FD_PREVIOUS_PUBLICATION_EXISTS_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_DAY', 'FD_PREVIOUS_PUBLICATION_EXISTS_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_MONTH', 'FD_PREVIOUS_PUBLICATION_EXISTS_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ_YEAR'])
pf_award = pf_award.drop(columns=['FD_PREVIOUS_PUBLICATION_EXISTS_PREVIOUS_PUBLICATION_NOTICE_DATE_OJ_DAY', 'FD_PREVIOUS_PUBLICATION_EXISTS_PREVIOUS_PUBLICATION_NOTICE_DATE_OJ_MONTH', 'FD_PREVIOUS_PUBLICATION_EXISTS_PREVIOUS_PUBLICATION_NOTICE_DATE_OJ_YEAR'])

In [None]:
pf_award["DOFFIN_APPENDIX_NATIONAL_REFERENCE_SECTION_REFERENCE_DOFFIN_DATE"]= ps.to_datetime(pf_award["DOFFIN_APPENDIX_NATIONAL_REFERENCE_SECTION_REFERENCE_DOFFIN_DATE"])
pf_award["DOFFIN_APPENDIX_VOLUNTARY_REFERENCE_SECTION_REFERENCE_DOFFIN_DATE"]= ps.to_datetime(pf_award["DOFFIN_APPENDIX_VOLUNTARY_REFERENCE_SECTION_REFERENCE_DOFFIN_DATE"])
pf_award["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"]= ps.to_datetime(pf_award["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"])
pf_award["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"]= ps.to_datetime(pf_award["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
pf_award["FD_PREVIOUS_PUBLICATION_EXISTS_CNT_NOTICE_INFORMATION_DATE_OJ"]= ps.to_datetime(pf_award["FD_PREVIOUS_PUBLICATION_EXISTS_CNT_NOTICE_INFORMATION_DATE_OJ"])
pf_award["FD_PREVIOUS_PUBLICATION_EXISTS_DATE_OJ"]= ps.to_datetime(pf_award["FD_PREVIOUS_PUBLICATION_EXISTS_DATE_OJ"])
pf_award["FD_PREVIOUS_PUBLICATION_EXISTS_EX_ANTE_NOTICE_INFORMATION_DATE_OJ"]= ps.to_datetime(pf_award["FD_PREVIOUS_PUBLICATION_EXISTS_EX_ANTE_NOTICE_INFORMATION_DATE_OJ"])
pf_award["FD_PREVIOUS_PUBLICATION_EXISTS_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ"]= ps.to_datetime(pf_award["FD_PREVIOUS_PUBLICATION_EXISTS_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_DATE_OJ"])
pf_award["FD_PREVIOUS_PUBLICATION_EXISTS_PREVIOUS_PUBLICATION_NOTICE_DATE_OJ"]= ps.to_datetime(pf_award["FD_PREVIOUS_PUBLICATION_EXISTS_PREVIOUS_PUBLICATION_NOTICE_DATE_OJ"])


pf_award["AWARDED_CONTRACT_DATE_DECISION_JURY"]= ps.to_datetime(pf_award["AWARDED_CONTRACT_DATE_DECISION_JURY"])
pf_award["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"]= ps.to_datetime(pf_award["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
pf_award["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"]= ps.to_datetime(pf_award["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
pf_award["PROCEDURE_DATE_RECEIPT_TENDERS"]= ps.to_datetime(pf_award["PROCEDURE_DATE_RECEIPT_TENDERS"])
pf_award["PROCEDURE_DATE_AWARD_SCHEDULED"]= ps.to_datetime(pf_award["PROCEDURE_DATE_AWARD_SCHEDULED"])

In [None]:
pf_award['AWARD_CONTRACT_CONTRACT_NO'].update(pf_award.pop('AWARD_CONTRACT_CONTRACT_NUMBER'))
pf_award['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION'].update(pf_award.pop('FD_COMPLEMENTARY_INFORMATION_CONTRACT_AWARD_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION'))
pf_award['FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION'].update(pf_award.pop('FD_COMPLEMENTARY_INFORMATION_PROCEDURES_FOR_APPEAL_LODGING_OF_APPEALS_LODGING_OF_APPEALS_PRECISION'))
pf_award['FD_OBJECT_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_INCLUDING_VAT_VAT_PRCT'].update(pf_award.pop('FD_OBJECT_CONTRACT_INFORMATION_CONTRACT_AWARD_NOTICE_TOTAL_FINAL_VALUE_COSTS_RANGE_AND_CURRENCY_WITH_VAT_RATE_INCLUDING_VAT_VAT_PRCT'))
pf_award['OBJECT_CONTRACT_CPV_ADDITIONAL_CPV_CODE_CODE'].update(pf_award.pop('OBJECT_CONTRACT_CPV_CPV_ADDITIONAL_CPV_CODE_CODE'))
pf_award['OBJECT_CONTRACT_CPV_MAIN_CPV_CODE_CODE'].update(pf_award.pop('OBJECT_CONTRACT_CPV_CPV_MAIN_CPV_CODE_CODE'))

In [None]:
pf_award = pf_award.rename(columns=lambda x_ re.sub('AWARDED_CONTRACT_', '', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('PROCUREMENT_DISCONTINUED_', '', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('CONTRACT_VALUE_INFORMATION_', '', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('COMPLEMENTARY_INFO_', '', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('CONTRACTING_BODY_', '', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('OBJECT_CONTRACT_', 'OC_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('AWARD_CONTRACT_', 'AC_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('AC_CONTRACT_LIKELY_SUB_CONTRACTED_SUBCONTRACT_SUBCONTRACT_AWARD_PART_VALUE', 'AC_CONTRACT_LIKELY_SUB_CONTRACTED_SUBCONTRACT_AWARD_PART_VALUE', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('INCLUDING_VAT', 'INC_VAT', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('DOFFIN_APPENDIX', 'DA', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('PT_', '', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('_FORM_EXTENSIONS', '', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('REFERENCE_SECTION_', '', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('FD_CONTRACTING_TYPE_AND_ACTIVITIES_AND_PURCHASING_ON_BEHALF_TYPE_AND_ACTIVITIES_', 'PURCHASING_ON_BEHALF_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('FD_PREVIOUS_PUBLICATION_EXISTS_', 'PREVIOUS_PUBLICATION_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('FD_PROCEDURES_AWARD_CRITERIA_CONTRACT_UTILITIES_INFORMATION_PRICE_AWARD_CRITERIA_PRICE', 'PRICE_AWARD_CRITERIA_PRICE', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_', '', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('VAL_BARGAIN_PURCHASE_', 'VALUES_VAL_BARGAIN_PURCHASE_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('VAL_SUBCONTRACTING_', 'VALUES_VAL_SUBCONTRACTING_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('CONTRACT_AWARD_EXTENSIONS_NATIONAL_ANNEXD4_', 'NATIONAL_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('VOLUNTARY_EX_ANTE_TRANSPARENCY_NOTICE_EXTENSIONS_NATIONAL_ANNEXD4_', 'V_NATIONAL_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('FD_CONTRACTING_ACTIVITIES_OF_CONTRACTING_ENTITY_', '', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('CNT_NOTICE_INFORMATION_S_', 'CNT_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('EX_ANTE_NOTICE_INFORMATION_S_', 'EX_ANTE', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('TYPE_OF_PROCEDUREAWARD_WITHOUT_PRIOR_PUBLICATION_', 'WITHOUT_PRIOR_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('TYPE_OF_PROCEDURENEGOTIATED_WITHOUT_COMPETITION_', 'WITHOUT_COMPETITION_', x))

In [None]:
pf_award.info()

In [None]:
spf_award = pf_award.to_spark()

In [None]:
#spf_award.write.parquet("falk2210/spf_award041221.parquet")

In [None]:
spf_award.distinct().count()

In [None]:
pf_award = pf_award.rename(columns=lambda x_ re.sub('PREVIOUS_PUBLICATION_OTHER_PREVIOUS_PUBLICATIONS_OTHER_PREVIOUS_PUBLICATION_', 'OTHER_PREVIOUS_PUBLICATION_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('PREVIOUS_PUBLICATION_PREVIOUS_PUBLICATION_NOTICE_', 'PREVIOUS_PUBLICATION_', x))
#pf_award = pf_award.rename(columns=lambda x_ re.sub('TYPE_OF_PROCEDUREAWARD_WITHOUT_PRIOR_PUBLICATION_', 'WITHOUT_PRIOR_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('TYPE_OF_PROCEDURENEGOTIATED_WITHOUT_COMPETITION_', 'WITHOUT_COMPETITION_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('FD_PROCEDURE_DEFINITION_CONTRACT_AWARD_NOTICE_', 'PROCEDURE_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('PROCEDURE_DIRECTIVE_2014_24_EU_NEGOTIATED_WITHOUT_PUBLICATION_D_ACCORDANCE_ARTICLE_', 'WITHOUT_PRIOR_', x))
pf_award = pf_award.rename(columns=lambda x_ re.sub('PROCEDURE_AWARD_CONTRACT_WITHOUT_PUBLICATION_', 'WITHOUT_PRIOR_', x))

In [None]:
pf_award.info()

In [None]:
#p_awa["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"]

In [None]:
p_awa["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"]= ps.to_datetime(p_awa["AWARDED_CONTRACT_DATE_CONCLUSION_CONTRACT"])
p_awa["AWARDED_CONTRACT_DATE_DECISION_JURY"]= ps.to_datetime(p_awa["AWARDED_CONTRACT_DATE_DECISION_JURY"])
#p_awa["AWARD_CONTRACT_NO_AWARDED_CONTRACT_PROCUREMENT_DISCONTINUED_DATE_DISPATCH_ORIGINAL_PUBLICATION"]= ps.to_datetime(p_awa["AWARD_CONTRACT_NO_AWARDED_CONTRACT_PROCUREMENT_DISCONTINUED_DATE_DISPATCH_ORIGINAL_PUBLICATION"])
#p_awa["AWARD_CONTRACT_NO_AWARDED_CONTRACT_PROCUREMENT_DISCONTINUED_DATE_DISPATCH_ORIGINAL_text"]= ps.to_datetime(p_awa["AWARD_CONTRACT_NO_AWARDED_CONTRACT_PROCUREMENT_DISCONTINUED_DATE_DISPATCH_ORIGINAL_text"])
p_awa["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"]= ps.to_datetime(p_awa["COMPLEMENTARY_INFO_DATE_DISPATCH_NOTICE"])
p_awa["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"]= ps.to_datetime(p_awa["OBJECT_CONTRACT_OBJECT_DESCR_DATE_END"])
p_awa["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"]= ps.to_datetime(p_awa["OBJECT_CONTRACT_OBJECT_DESCR_DATE_START"])
p_awa["PROCEDURE_DATE_RECEIPT_TENDERS"]= ps.to_datetime(p_awa["PROCEDURE_DATE_RECEIPT_TENDERS"])
p_awa["PROCEDURE_DATE_AWARD_SCHEDULED"]= ps.to_datetime(p_awa["PROCEDURE_DATE_AWARD_SCHEDULED"])