# Sync Police Sandbox
## Import CSV files
CSV files from the __backend__, __Odoo__ and __planday__ can been exported manually. Let's see if we can export them automatically.

In [6]:
from pandas import read_csv
from os.path import join

csv_dir = '/Users/loicjounot/Code/internal-tools/sync_police/sandbox/drivers/'

In [7]:
odoo_filepath = join(csv_dir, 'odoo.csv')
odoo_df = read_csv(odoo_filepath)
odoo_df

Unnamed: 0,External ID,Active,Planday username,Backend UUID,Job Position,Salary ID,Driver App username,Name,Fleet/Name
0,__export__.res_partner_2303,True,,6f1596ed-cc2d-4f53-b2b1-405e830285b9,Driver,279198,aftabs,Aftab Shafqat,birmingham
1,__export__.res_partner_3010,True,,86638e4b-8646-44f8-a4b7-ca1cb6d0d094,Valk Driver Leicester,284845,aaka,Aamir Karim,leicester
2,__export__.res_partner_1877,True,,f14b717a-c873-4550-af43-61a590542502,Driver,274578,AaYa,Aamir Yaqoob,london_zone_20
3,__export__.res_partner_3749,True,,,Driver,,Ajohnson,Aaron Johnson,manchester
4,__export__.res_partner_989,True,,aa7d7e68-5b27-40b5-9e86-c5245fe8cc39,Driver,240035,apervaiz,Aasim Pervaiz,birmingham
5,__export__.res_partner_623,True,,92a58abf-c431-41d1-a144-9724b8ad6e0b,Driver,227979,sabbas,"Abbas, Sam",nottingham
6,__export__.res_partner_3116,True,,a42d1cf3-1c7a-4019-bf8d-217f4271065c,Driver,286568,azeboudj,Abdel Malek Zeboudj,london_zone_j
7,__export__.res_partner_1432,True,,5e44a4b3-6abe-40ca-a08d-c15553fa7e88,Driver,268112,ahassan3,Abdihamid Hassan,
8,__export__.res_partner_1143,True,,5f922c71-9f5e-49b4-8af2-d121578bd833,Driver,259419,aahmed,Abdul Ahmed,birmingham
9,__export__.res_partner_1227,True,,3c0036ff-5b3d-45fa-8400-f44de120f112,Driver,261550,abdulali,Abdul Aziz Ali,birmingham


In [37]:
planday_filepath = join(csv_dir, 'planday.csv')
planday_df = read_csv(planday_filepath, sep=';', header=None)
planday_df

Unnamed: 0,0,1,2,3,4,5,6
0,﻿__test driver,360,_testdriver360,288542,,,
1,__test driver 999,__test,___test3333,280585,,,
2,__test1111,11,_test011,276535,,,
3,_testdriver77,.,_testdriver77,274861,,,
4,1,m,ahabibani,280916,,,
5,Abdallah,Sina,abdallahsina,259090,Nottingham,,
6,Abdel Malek,Zeboudji,AZeboudji,286568,,,
7,Abdelrazig,Sanousi,asanousi1,286661,Birmingham,,
8,Abdihamid,Hassan,abdihamid,268112,Birmingham,,
9,Abdul,Kuddoos,abdul.kuddoos,264630,birmingham,,


In [5]:
backend_filepath = join(csv_dir, 'backend.csv')
backend_df = read_csv(backend_filepath, sep=',', parse_dates=['created_at', 'deleted_at'])
backend_df.head()

NameError: name 'join' is not defined

It looks like the __planday__ file is missing column descriptors.

## Checking data synchronization
This is a "though experiment" implementing fake data. The goal is to synchronize the above three data tables: __planday__, the __backend__ and __odoo__ (which acts as the mapper between __planday__ and __odoo__). I try to encompass most of the different ways that data can be missing.

### Summary of the fake data
* driver __0__ has been entered everwhere and has no empty fields
* driver __1__ has been entered everywhere but has an __empty field in planday__
* driver __2__ has been entered everywhere but has an __empty field in the backend__
* driver __3__ has been entered everywhere but has an __empty field in odoo__
* driver __4__ has been mapped in odoo but __no matching ID can be found in the backend__
* driver __5__ has been mapped in odoo but __no matching ID can be found in planday__
* driver __4__ has been mapped in odoo but __no matching ID can be found in the backend__
* driver __5__ has been mapped in odoo but __no matching ID can be found in planday__
* driver __6__ has been entered in the backend and planday but __not in odoo__
* driver __7__ has been entered in the __backend only__
* driver __8__ has been entered in __planday only__
* driver __9__ has been entered in __odoo only__
* driver __10__ has been entered in odoo with a planday ID but without a backend ID
* driver __11__ has been entered in odoo but the planday

In [None]:
from pandas import DataFrame

df_backend = DataFrame({
    'id_backend': ['b0', 'b1', 'b2', 'b3', 'b5', 'b6', 'b7'], 
    'info_backend': ['toast', 'sausage', '<BACKEND>', 'beans', 'eggs', 'bacon', 'coffee']
})

df_planday = DataFrame({
    'id_planday': ['p0', 'p1', 'p2', 'p3', 'p4', 'p6', 'p8'], 
    'info_planday': ['star', '<PLANDAY>', 'wars', 'darth', 'vador', 'jedi', 'leia']
})

df_odoo = DataFrame({
    'id_odoo': ['o0', 'o1', 'o2', 'o3', 'o4', 'o5', 'o9'],
    'id_backend_in_odoo': ['b0', 'b1', 'b2', 'b3', '<ROSETTA>', 'b5', '<BACKEND>'], 
    'id_planday_in_odoo': ['p0', 'p1', 'p2', 'p3', 'p4', '<ROSETTA>', '<PLANDAY>'],
    'info_odoo': ['uk', 'usa', 'france', '<ODOO>', 'china', 'iceland', 'Egypt']
})

## Creating better fake data
The above is difficult to get consistent to I rationalised the whole thing and put it in one single table

In [None]:
from pandas import DataFrame, read_sql_table
from dataset import connect
from sqlalchemy import create_engine

engine = create_engine('sqlite:///../../tests/assets/sync_police_test_cases.sqlite')
db = connect ('sqlite:///../../tests/assets/sync_police_test_cases.sqlite')

In [None]:
db.load_table('test_cases_v1')

In [None]:
tests_df = read_sql_table('test_cases_v1', engine)
tests_df

In [None]:
from numpy import nan

df_backend = tests_df[['id_backend', 'info_backend', 'description']].replace('', nan)
df_backend.dropna(thresh=2, inplace=True)

### Backend table

In [None]:
df_backend

## Planday table

In [None]:
df_planday = tests_df[['id_planday', 'info_planday', 'description']].replace('', nan)
df_planday.dropna(thresh=2, inplace=True)
df_planday

## Odoo table

In [None]:
df_odoo = tests_df[['id_odoo', 'info_odoo', 'id_planday_in_odoo', 'id_backend_in_odoo', 'description']].replace('', nan)
df_odoo.dropna(thresh=4, inplace=True)
df_odoo

In [None]:
from pandas import merge

## Join Odoo with the backend
### Outer join on the odoo and backend tables.

In [None]:
from numpy import nan

df_backend_with_odoo = merge(
    left=df_backend, 
    right=df_odoo, 
    left_on='id_backend', 
    right_on='id_backend_in_odoo', 
    how='outer'
).fillna('')

df_backend_with_odoo['description'] = df_backend_with_odoo['description_x'] + ' | ' + df_backend_with_odoo['description_y'] 
#df_backend_with_odoo['description'] = df_backend_with_odoo['description'].map(lambda x: x.split(' |')[0])
df_backend_with_odoo.replace('', nan)
del df_backend_with_odoo['description_x']
del df_backend_with_odoo['description_y']
df_backend_with_odoo

### Outer join on the odoo and planday tables

In [None]:
df_planday_with_odoo = merge(
    left=df_planday, 
    right=df_odoo, 
    left_on='id_planday', 
    right_on='id_planday_in_odoo', 
    how='outer'
).fillna('')
df_planday_with_odoo
df_planday_with_odoo['description'] = df_planday_with_odoo['description_x'] + ' ' + df_planday_with_odoo['description_y'] 
df_planday_with_odoo.replace('', nan)
del df_planday_with_odoo['description_x']
del df_planday_with_odoo['description_y']
df_planday_with_odoo

### Inner join on the two intermediate tables
__Note__: the nature of the join is irrelevant. This could also be an outer, left or right join.

In [None]:
df = merge(
    left=df_backend_with_odoo, 
    right=df_planday_with_odoo, 
    left_on='id_odoo', 
    right_on='id_odoo',
    suffixes=('_from_backend_join', '_from_planday_join'),
    how='inner'
)
df

### Delete redundant columns

In [None]:
new_df = df
del new_df['id_planday_in_odoo_from_backend_join']
del new_df['id_backend_in_odoo_from_planday_join']
del new_df['info_odoo_from_backend_join']
del new_df['description_from_backend_join']
new_df

### Rename columns for clarity

In [None]:
new_df.rename(columns = {'id_backend_in_odoo_from_backend_join':'id_backend_in_odoo'}, inplace = True)
new_df.rename(columns = {'id_planday_in_odoo_from_planday_join':'id_planday_in_odoo'}, inplace = True)
new_df.rename(columns = {'info_odoo_from_planday_join':'info_odoo'}, inplace = True)
new_df.rename(columns = {'description_from_planday_join':'description'}, inplace = True)
new_df

### Re-order the columns and rows

In [None]:
ordered_columns = [
    'id_odoo',
    'info_odoo',
    'id_backend_in_odoo',
    'id_planday_in_odoo',
    'id_backend',
    'info_backend',
    'id_planday',
    'info_planday',
    'description'
]
new_df.reindex_axis(ordered_columns, axis=1).sort_values(by=['id_odoo', 'id_backend', 'id_planday'])

## Store the test cases inside a small database
Use python's [__dataset__](http://dataset.readthedocs.org/en/latest/) library to store test cases inside an sqlite database. This is more or less an excuse to try out this nigtfy little package. It also has two advantages:
* Multiple test cases can be stored inside one (sqlite) file
* Tests cases can be edited visually from your favorite database GUI (e.g. PyCharm)

In [4]:
from pandas import read_csv
from dataset import connect

In [5]:
db = connect ('sqlite:///../../tests/assets/test_db.sqlite')

In [6]:
test_cases_df = read_csv('../../tests/assets/sync_police_test_cases.csv', sep=';', index_col='Index')
test_cases_df.fillna('', inplace=True)
test_cases_df

OSError: File b'../../tests/assets/sync_police_test_cases.csv' does not exist

In [None]:
test_cases_dict = test_cases_df.to_dict(orient='records')
test_cases_dict

In [None]:
test_cases_table = db['junk']

In [None]:
test_cases_table.insert_many(test_cases_dict)

## Test the YAML config file structure

In [None]:
from yaml import load, load_all
from pprint import pprint

with open('/Users/loicjounot/Code/internal-tools/integration/configuration.yaml') as f:
    text = f.read()
    configs = load_all(text)
    
for config in configs:
    pprint(config)

### A simplified approach to merging tables
The above solution doesn't work. Let's try to implement our own merging algorithm. The basic insight is that the test case table we start with __is__ actually the solution we're looking for. Which means that we need to construct a table where at least on of the keys (__planday__, __backend__ or __odoo__) is present on each row. This however assumes __all three keys are unique__, otherwise things fall appart.

## Register class methods using decorators
This is essentially me learning meta-programming. I'm trying to implement an elegant solution to specify which columns of a dataframe to report and possibly rename the column at the same time. See [stackoverflow](http://stackoverflow.com/questions/5707589/calling-functions-by-array-index-in-python/5707605#5707605) for more information.

In [None]:
def makeRegistrar():
    registry = {}
    def registrar(func):
        registry[func.__name__] = func
        return func  # normally a decorator returns a wrapped function, 
                     # but here we return func unmodified, after registering it
    registrar.all = registry
    return registrar

In [None]:
reg = makeRegistrar()
@reg
def f1(a):
    return a+1

In [None]:
def fN(n):
    return reg.all['f'+str(n)]

In [None]:
fN(1)(2)

In [None]:
def all_f():
    return reg.all
all_f()

OK now that's the basic principle, let's see if I can apply that to my problem...

In [None]:
# My function needs to return a pandas Series and the registry needs to return a list of Series
from pandas import Series

def my_column():
    return Series(range(5, 10))

my_column()

In [None]:
def export_as(new_name):
    registry = {}
    def registrar(func):
        registry[func.__name__] = new_name
        return func  # normally a decorator returns a wrapped function, 
                     # but here we return func unmodified, after registering it
    registrar.all = registry
    return registrar

In [None]:
@export_as('hello')
def my_column():
    return Series(range(5, 10))

In [None]:
export_as('hello').all

I looked for inspiration at Flask. Because somehow the route decorator does something close. Here it is:

In [None]:
def route(rule, **options):
    def decorator(f):
        endpoint = options.pop('endpoint', None)
        self.add_url_rule(rule, endpoint, f, **options)
        return f
    return decorator

In [None]:
def export_as(new_name):
    def decorator(f):
        setattr(f, 'is_exported', True)
        setattr(f, 'field', new_name)
        return f
    return decorator

In [None]:
@export_as('hello')
def my_function():
    pass

In [None]:
hasattr(my_function, 'is_exported')

In [None]:
my_function.field

In [None]:
dir()

In [None]:
def needle_in_haystack(a):
    print(a)
    for item in a:
        if hasattr(getattr(item), 'is_exported'):
            print(getattr(item, 'field'))
needle_in_haystack(dir())

In [None]:
globals()

In [1]:
uri.format(**db)
            conn = connect(uri)
            cur = conn.cursor()
            cur.execute(test)
            result = cur.fetchone()
            cur.close()
            conn.close()

NameError: name 'uri' is not defined

In [1]:
from pandas import DataFrame

In [2]:
df = DataFrame()

In [10]:
from datetime import datetime
odoo_df['timestamp'] = datetime.now()
odoo_df

Unnamed: 0,External ID,Active,Planday username,Backend UUID,Job Position,Salary ID,Driver App username,Name,Fleet/Name,timestamp
0,__export__.res_partner_2303,True,,6f1596ed-cc2d-4f53-b2b1-405e830285b9,Driver,279198,aftabs,Aftab Shafqat,birmingham,2016-01-18 12:21:32.006602
1,__export__.res_partner_3010,True,,86638e4b-8646-44f8-a4b7-ca1cb6d0d094,Valk Driver Leicester,284845,aaka,Aamir Karim,leicester,2016-01-18 12:21:32.006602
2,__export__.res_partner_1877,True,,f14b717a-c873-4550-af43-61a590542502,Driver,274578,AaYa,Aamir Yaqoob,london_zone_20,2016-01-18 12:21:32.006602
3,__export__.res_partner_3749,True,,,Driver,,Ajohnson,Aaron Johnson,manchester,2016-01-18 12:21:32.006602
4,__export__.res_partner_989,True,,aa7d7e68-5b27-40b5-9e86-c5245fe8cc39,Driver,240035,apervaiz,Aasim Pervaiz,birmingham,2016-01-18 12:21:32.006602
5,__export__.res_partner_623,True,,92a58abf-c431-41d1-a144-9724b8ad6e0b,Driver,227979,sabbas,"Abbas, Sam",nottingham,2016-01-18 12:21:32.006602
6,__export__.res_partner_3116,True,,a42d1cf3-1c7a-4019-bf8d-217f4271065c,Driver,286568,azeboudj,Abdel Malek Zeboudj,london_zone_j,2016-01-18 12:21:32.006602
7,__export__.res_partner_1432,True,,5e44a4b3-6abe-40ca-a08d-c15553fa7e88,Driver,268112,ahassan3,Abdihamid Hassan,,2016-01-18 12:21:32.006602
8,__export__.res_partner_1143,True,,5f922c71-9f5e-49b4-8af2-d121578bd833,Driver,259419,aahmed,Abdul Ahmed,birmingham,2016-01-18 12:21:32.006602
9,__export__.res_partner_1227,True,,3c0036ff-5b3d-45fa-8400-f44de120f112,Driver,261550,abdulali,Abdul Aziz Ali,birmingham,2016-01-18 12:21:32.006602


In [None]:
def write_to_excel():
    # DEFAULT_OUTPUT_DIR = join(expanduser('~'), 'sync_police')
    # excel_filepath = join(DEFAULT_OUTPUT_DIR, 'test.xlsx')
    # writer = ExcelWriter(excel_filepath)
    # planday_with_odoo.to_csv()
    # planday_with_odoo.to_excel(writer, 'planday with odoo')
    # backend_with_odoo.to_excel(writer, 'backend with odoo')
    # writer.save()