## Urbanlytics / Team 23 - Easy Tasks: 1) a-c (local solution)
This notebook contains the answers for Question 1.) a/b/c (questions to marton.szel@lynxanalytics.com and daniel.matzner@lynxanalytics.com)

In [3]:
# Data Path and PArameters
path_in_basefolder = 'C:\\Users\\danma\\OneDrive\\Dokumentumok\\lynx_comp\\s3_copy\\final_solution\\'
path_in_shapefolder = path_in_basefolder + 'shape\\'
path_out_visu = path_in_basefolder + 'visu\\'
path_out_rep = path_in_basefolder + 'rep\\'

In [4]:
# Core libs
import os
import time
from os import listdir
from os.path import isdir, join
import datetime

# ETL-s and Data Preparation
import numpy as np
import pandas as pd

# for GEO analyses and modifying projections
import geopandas as gpd
from shapely.geometry import Point, LineString, MultiPolygon, Polygon, MultiLineString

# for visualization
import matplotlib
import matplotlib.pyplot as plt
#from descartes import PolygonPatch
from matplotlib.collections import PatchCollection
from matplotlib import collections as mc
#import contextily as ctx
import pylab as pl

# Settings for Matplotlib
%matplotlib inline

In [5]:
# Own Functions and udf-s
def _log(s):
    '''
    Printing to the screen with a timestamp
    '''
    print('[' + time.strftime('%a %H:%M:%S') + '] ' + s)


def _shapefile_reader(_shp_folder,
                      rmode='oneFile',
                      extension='.shp',
                      exception_list=[],
                      verbose=False):
    '''
    Reading the shape files from a pre defined folder (_shp_folder). 
    If the mode is 'oneFile', it exports all the shapes in the input folder to a file
    If the mode is dictionary, it will export the shapes into a dictionary, where the key is the filename in the folder
    '''
    if verbose:
        _log('Started reading shapefiles from ' + _shp_folder)

    result_dict = {}

    # Read library, collect shape files
    shapeFiles = [f for f in listdir(_shp_folder) if f.endswith(extension)]

    # Read and append shape files within the library
    for _shapefile in shapeFiles:
        if _shapefile not in exception_list:
            result_dict.update({
                _shapefile.split('.')[0]:
                gpd.read_file(_shp_folder + _shapefile)
            })
            if verbose:
                _log(_shapefile + ' file has {} lines and {} columns'.format(
                    result_dict[_shapefile.split('.')[0]].shape[0],
                    result_dict[_shapefile.split('.')[0]].shape[1]))

    # Append the files to a pandas DataFrame
    _tmp_shp_df = gpd.GeoDataFrame()
    if rmode == 'oneFile':
        result_shp_df = gpd.GeoDataFrame()
        for keys in result_dict.keys():
            result_shp_df = _tmp_shp_df.append(result_dict[keys], sort=True)
        return result_shp_df

    # Return with the dictionary - if the mode is dict mode
    elif rmode == 'dictionary':
        return result_dict

    else:
        return None

### 1. Loading Data Sets
#### 1.1 CSV files

In [6]:
pd_CRM = pd.read_csv(path_in_basefolder + 'crm.csv', sep='\t')
pd_CRM.head(3)

Unnamed: 0,customer_id,customer_age,arpu,model,gender
0,1000018443372088799,,,,
1,1000156262008373076,,,,
2,1000224617298878347,23.0,56.7,Unknown,female


In [7]:
pd_event_log = pd.read_csv(path_in_basefolder + 'event_log.csv', sep='\t', dtype={'date_time':int})
pd_event_log['date_time_2'] = pd.to_datetime(pd_event_log['date_time'], unit='s')
pd_event_log.head(3)

Unnamed: 0,customer_id,grid_id,date_time,date_time_2
0,h092861261790079492,166839.0,1563521400,2019-07-19 07:30:00
1,k438071835109624353,168252.0,1563521460,2019-07-19 07:31:00
2,1481701602817207678,179477.0,1563521760,2019-07-19 07:36:00


In [8]:
pd_merchants = pd.read_csv(path_in_basefolder + 'Merchant_list.csv', sep='\t')
pd_merchants.head(3)

Unnamed: 0,MerchantCode,MerchantName,MerchantCategory,grid_id
0,0001ak3c5il5,Retail Chain 6,Retail,167546.0
1,0001c35xpel5,Retail Chain 38,Retail,164737.0
2,000831n1c55,Retail Chain 38,Retail,178079.0


In [9]:
pd_past_results = pd.read_csv(path_in_basefolder + 'past_result.csv', sep='\t')
pd_past_results.head(2)

Unnamed: 0,customer_id,campaign_outcome
0,7278271474546681529,Not Interested
1,z296370477958436978,Not Interested


In [10]:
pd_poi = pd.read_csv(path_in_basefolder + 'poi.csv', sep=',')
pd_poi.head(2)

Unnamed: 0,Category,SubCategory,Latitude,Longitude,POIName,fb
0,amenity,cafe,2.924346,101.639315,Poolside Cafe,1
1,amenity,community_centre,2.932361,101.647073,Dewan Masjid Raja Haji Fisabilillah,0


In [11]:
pd_poi.groupby(['Category', 'SubCategory', 'POIName']).agg(
    {'Latitude':'count'}).rename(columns={'Latitude':'shp_cnt'}).reset_index().to_csv('POI_Categories_2.csv',index=False)

In [12]:
pd_poi[['POIName']].drop_duplicates().to_csv('POI_Categories.csv')

In [13]:
pd_potential_users = pd.read_csv(path_in_basefolder + 'potential_users.csv', sep=',')
pd_potential_users.head(2)

Unnamed: 0,customer_id
0,1004944629214078488
1,1007484341585366249


In [14]:
pd_transaction = pd.read_csv(path_in_basefolder + 'transaction.csv', sep='\t', dtype={'date_time':int})
pd_transaction['date_time_2'] = pd.to_datetime(pd_transaction['date_time'], unit='s')
pd_transaction.head(2)

Unnamed: 0,customer_id,MerchantCode,MerchantCategory,date_time,GTV,date_time_2
0,r43045724262114812,pmbmkbipk,Transit & Travel,1563536760,31.45,2019-07-19 11:46:00
1,165324620748557838,92r201n2a0iy,Household Goods and Groceries,1563536760,17.25,2019-07-19 11:46:00


In [15]:
pd_transport = pd.read_csv(path_in_basefolder + 'Transport_location.csv', sep=',', dtype={'date_time':int})
pd_transport['date_time_2'] = pd.to_datetime(pd_transaction['date_time'], unit='s')
pd_transport.head(2)

Unnamed: 0,POI Name,Latitude,Longitude,date_time_2
0,Gocar Station 1,2.90689,101.65628,2019-07-19 11:46:00
1,Gocar Station 2,2.91419,101.65499,2019-07-19 11:46:00


In [16]:
pd_transport[['POI Name']].drop_duplicates().to_csv('transport_Categories.csv')

#### 1.2 Shape files

In [17]:
shp_civerjaya = _shapefile_reader(path_in_shapefolder, verbose=True)
#shp_civerjaya['grid_id'] = shp_civerjaya.grid_id.astype('int')
shp_civerjaya.head()

[Fri 16:08:17] Started reading shapefiles from C:\Users\danma\OneDrive\Dokumentumok\lynx_comp\s3_copy\final_solution\shape\
[Fri 16:08:17] shape.shp file has 574 lines and 2 columns


Unnamed: 0,geometry,grid_id
0,"POLYGON ((101.63912 2.88681, 101.63706 2.88759...",159117.0
1,"POLYGON ((101.64137 2.88629, 101.64070 2.88621...",159118.0
2,"POLYGON ((101.64362 2.88655, 101.64137 2.88629...",159119.0
3,"POLYGON ((101.64586 2.88681, 101.64362 2.88655...",159120.0
4,"POLYGON ((101.64811 2.88707, 101.64586 2.88681...",159121.0


In [18]:
shp_civerjaya = gpd.GeoDataFrame(shp_civerjaya)
shp_civerjaya.crs = {'init':'epsg:4326'}
shp_civerjaya = shp_civerjaya.reset_index().drop(['index'], axis=1)

## Easy Tasks
### Task 1.a) 
In recent years, the population and visitors of Cyberjaya has grown tremendously. Hence, Cyberview is embarking on a data-driven, approach to anticipate and plan for future infrastructure and services. To assist in this endeavour, the town planners would like to get a rough estimate of the current number of visitors. Calculate average daily subscribers rounded to 2 decimal places for grid IDs in Cyberjaya listed below for the time period between 2019-07-19 to 2019-08-18 during 0700 to 1500

In [19]:
#Load data
event_log=pd.read_csv('event_log.csv', sep='\t')
event_log.head()

Unnamed: 0,customer_id,grid_id,date_time
0,h092861261790079492,166839.0,1563521000.0
1,k438071835109624353,168252.0,1563521000.0
2,1481701602817207678,179477.0,1563522000.0
3,5368934103184310830,170354.0,1563522000.0
4,u832408493379095560,175968.0,1563522000.0


In [20]:
# Get date
task_1a_event_log = pd_event_log.copy()

task_1a_event_log['just_date'] = pd_event_log['date_time_2'].dt.date

# Get hour
task_1a_event_log['hours'] = task_1a_event_log['date_time_2'].dt.hour

# Filters: Filter hour and selected dates

# good filter: event_1a=event_log.set_index('date_time_2').between_time('7:.00','15:00').set_index('customer_id')

task_1a_event_log = task_1a_event_log[(task_1a_event_log.hours >= 7) & (task_1a_event_log.hours <= 15) &
                                      (task_1a_event_log.just_date >= datetime.date(year=2019, month=7, day=19)) &
                                      (task_1a_event_log.just_date <= datetime.date(year=2019, month=8, day=18))]

task_1a_event_agg_00 = task_1a_event_log.groupby(['customer_id', 'just_date', 'grid_id']).agg(
    {'date_time': 'count'}).rename(columns={'date_time': 'cnt_1c'}).reset_index()
task_1a_event_agg_00['cust_cnt'] = 1

task_1a_event_agg_10 = task_1a_event_agg_00.groupby(
    ['just_date', 'grid_id']).agg({'cust_cnt': 'sum'}).reset_index()


# average versus divind by days
task_1a_event_agg_20 = task_1a_event_agg_10.groupby(
    ['grid_id']).agg({'cust_cnt': 'sum'}).reset_index()
task_1a_event_agg_20['daily_cust'] = task_1a_event_agg_20.cust_cnt / 31

grid_ids = [172456, 172457, 171754, 171755,
            171756, 171052, 171053, 171054, 171058, 170356]
task_1a_result = task_1a_event_agg_20[task_1a_event_agg_20.grid_id.isin(
    grid_ids)][['grid_id', 'daily_cust']]

task_1a_result

Unnamed: 0,grid_id,daily_cust
319,170356.0,135.741935
339,171052.0,18.741935
340,171053.0,23.677419
341,171054.0,66.677419
345,171058.0,31.516129
364,171754.0,34.483871
365,171755.0,22.548387
366,171756.0,77.741935
388,172456.0,34.516129
389,172457.0,30.645161


In [21]:
task_1a_result.sort_values(by='grid_id').to_csv('res_1a.csv')

### Task 1.b)
Part of the growth has been attributed to proliferation of the F&B industry. Identify all the Grid ID’s with F&B POIs using the datasets provided

In [22]:
task_1b_prep = pd_merchants[pd_merchants.MerchantCategory == "F&B"].copy()
task_1b_prep.head()

Unnamed: 0,MerchantCode,MerchantName,MerchantCategory,grid_id
4,0012ia2l2l9,F&B Individual 367,F&B,167544.0
13,0022n29iio1,F&B Individual 319,F&B,169651.0
15,002lc0l7k18,F&B Chain 57,F&B,173867.0
19,003al37c1lm6,F&B Chain 15,F&B,170342.0
20,003i77nc5he0,F&B Individual 240,F&B,166844.0


In [23]:
#task_1b_prep.MerchantCode.value_counts()

This is not the 1b result, as we will calculate the good answer later, under the 1c point. We will mark it as that point.

In [24]:
task_1b_result = task_1b_prep[['grid_id']].drop_duplicates()
#task_1b_result.to_csv('res_1b.csv')

In [25]:
#task_1b_result[task_1b_result.grid_id.apply(lambda x: int(str(x)[-1]) != 0)]

In [26]:
#pd_transaction.head()

In [27]:
#pd_transaction[pd_transaction.MerchantCategory == "F&B"].MerchantCode.nunique()

In [28]:
task_1b_result['FB_flag'] = 1
task_1b_result.head(2)

Unnamed: 0,grid_id,FB_flag
4,167544.0,1
13,169651.0,1


### Task 1.c)
Boost is also interested in expanding its footprint in Cyberjaya’s F&B industry as it has a significant number of userbase in Cyberjaya. The ability to identify the most popular F&B locations during lunch hour (1200 to 1400) allows for Boost to refine its F&B merchant acquisition strategy and run better campaigns. From the Grid ID’s identified in Question 1b), state the top 10 Grid ID’s with the highest number of Boost users from 1200 to 1400 and provide the count.

In [29]:
pd_poi['geometry'] = list(zip(pd_poi.Longitude, pd_poi.Latitude))
pd_poi['geometry'] = pd_poi['geometry'].apply(lambda x: Point(x))
shp_poi = gpd.GeoDataFrame(pd_poi)
shp_poi.crs = {'init':'epsg:4326'}
shp_poi = shp_poi.reset_index().drop(['index'], axis=1)
shp_poi.head()

Unnamed: 0,Category,SubCategory,Latitude,Longitude,POIName,fb,geometry
0,amenity,cafe,2.924346,101.639315,Poolside Cafe,1,POINT (101.63931 2.92435)
1,amenity,community_centre,2.932361,101.647073,Dewan Masjid Raja Haji Fisabilillah,0,POINT (101.64707 2.93236)
2,amenity,kindergarten,2.918134,101.63863,SK Cyberjaya,0,POINT (101.63863 2.91813)
3,amenity,parking,2.90738,101.65584,Cyberview Solar Farm Car Park,0,POINT (101.65584 2.90738)
4,amenity,parking,2.930377,101.641317,Faculty of management (FOM) Staff Car Park @ M...,0,POINT (101.64132 2.93038)


In [30]:
shp_poi_extended = gpd.sjoin(shp_poi, shp_civerjaya, how='left')
shp_poi_extended.head(2)
#shp_poi_extended.to_csv('poi_with_coord.csv')

Unnamed: 0,Category,SubCategory,Latitude,Longitude,POIName,fb,geometry,index_right,grid_id
0,amenity,cafe,2.924346,101.639315,Poolside Cafe,1,POINT (101.63931 2.92435),339.0,171052.0
1,amenity,community_centre,2.932361,101.647073,Dewan Masjid Raja Haji Fisabilillah,0,POINT (101.64707 2.93236),415.0,173161.0


In [31]:
poi_filter = shp_poi_extended[shp_poi_extended.fb > 0][['grid_id']].drop_duplicates()
poi_filter['fnb_grid'] = 1
poi_filter.head()

Unnamed: 0,grid_id,fnb_grid
0,171052.0,1
19,169653.0,1
20,171759.0,1
21,168249.0,1
22,168950.0,1


In [32]:
task_1c_base = pd_transaction.copy()
task_1c_base = pd.merge(task_1c_base, pd_merchants[['MerchantCode', 'grid_id']], how='left', on='MerchantCode')
task_1c_base = pd.merge(task_1c_base, poi_filter, how='left', on='grid_id').fillna({'fnb_grid':0})
task_1c_base = pd.merge(task_1c_base, task_1b_result, how='left', on='grid_id').fillna({'FB_flag':0})
task_1c_base.head()

Unnamed: 0,customer_id,MerchantCode,MerchantCategory,date_time,GTV,date_time_2,grid_id,fnb_grid,FB_flag
0,r43045724262114812,pmbmkbipk,Transit & Travel,1563536760,31.45,2019-07-19 11:46:00,176673.0,0.0,1.0
1,165324620748557838,92r201n2a0iy,Household Goods and Groceries,1563536760,17.25,2019-07-19 11:46:00,175259.0,0.0,1.0
2,h233548585934022368,3h00c55uk01,Household Goods and Groceries,1563536760,88.0,2019-07-19 11:46:00,171748.0,0.0,1.0
3,6074516273378802718,110o7is610eu,F&B,1563536760,50.82,2019-07-19 11:46:00,171758.0,0.0,1.0
4,860214065958232817,50013tcm50aa,Household Goods and Groceries,1563536760,16.97,2019-07-19 11:46:00,177367.0,0.0,1.0


In [33]:
fb_trn = task_1c_base[task_1c_base.MerchantCategory == "F&B"][['grid_id']].drop_duplicates()
fb_trn['FB_flag_trn'] = 1
fb_trn.head(2)

Unnamed: 0,grid_id,FB_flag_trn
3,171758.0,1
5,169663.0,1


In [34]:
task_1c_base = pd.merge(task_1c_base, fb_trn, how='left', on='grid_id').fillna({'FB_flag_trn':0})
task_1c_base.head(2)

Unnamed: 0,customer_id,MerchantCode,MerchantCategory,date_time,GTV,date_time_2,grid_id,fnb_grid,FB_flag,FB_flag_trn
0,r43045724262114812,pmbmkbipk,Transit & Travel,1563536760,31.45,2019-07-19 11:46:00,176673.0,0.0,1.0,1.0
1,165324620748557838,92r201n2a0iy,Household Goods and Groceries,1563536760,17.25,2019-07-19 11:46:00,175259.0,0.0,1.0,1.0


In [35]:
print(task_1c_base.shape[0])
print(pd_transaction.shape[0])
task_1c_base[task_1c_base.FB_flag > 0].MerchantCategory.value_counts()

228354
228354


F&B                              110114
Transit & Travel                  35741
Retail                            26766
Household Goods and Groceries     25066
Services                           8299
Name: MerchantCategory, dtype: int64

This is the final solution of 1b)

In [36]:
res_1b_alternative = task_1c_base[(task_1c_base.FB_flag > 0) | (task_1c_base.fnb_grid > 0)][['grid_id']].drop_duplicates()
res_1b_alternative.to_csv('res_1b_v2.csv')

In [37]:
res_1b_alternative

Unnamed: 0,grid_id
0,176673.0
1,175259.0
2,171748.0
3,171758.0
4,177367.0
...,...
17220,166840.0
19243,172460.0
24580,168246.0
38986,171052.0


In [38]:
task_1c_base.groupby(['fnb_grid', 'FB_flag', 'FB_flag_trn']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,customer_id,MerchantCode,MerchantCategory,date_time,GTV,date_time_2,grid_id
fnb_grid,FB_flag,FB_flag_trn,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.0,0.0,0.0,21490,21490,21490,21490,21490,21490,21490
0.0,1.0,1.0,199830,199830,199830,199830,199830,199830,199830
1.0,0.0,0.0,878,878,878,878,878,878,878
1.0,1.0,1.0,6156,6156,6156,6156,6156,6156,6156


In [39]:
task_1c_base = task_1c_base[((task_1c_base.FB_flag > 0) | (task_1c_base.fnb_grid > 0)) & 
                            (task_1c_base.date_time_2.dt.time >= datetime.time(12,0,0)) & (task_1c_base.date_time_2.dt.time <= datetime.time(14,0,0))]

task_1c_base.head(2)

Unnamed: 0,customer_id,MerchantCode,MerchantCategory,date_time,GTV,date_time_2,grid_id,fnb_grid,FB_flag,FB_flag_trn
645,k59343132662801596,03e8005ti3t,F&B,1563537600,24.43,2019-07-19 12:00:00,171760.0,0.0,1.0,1.0
646,k944698484231758385,n0o150g0p35c,Retail,1563537600,24.65,2019-07-19 12:00:00,164034.0,0.0,1.0,1.0


In [40]:
task_1c_base[(task_1c_base.fnb_grid > 0) & (task_1c_base.FB_flag == 0)].head(2)

Unnamed: 0,customer_id,MerchantCode,MerchantCategory,date_time,GTV,date_time_2,grid_id,fnb_grid,FB_flag,FB_flag_trn
1020,q676485508413304516,103k50l05ce,Household Goods and Groceries,1563537840,312.09,2019-07-19 12:04:00,168248.0,1.0,0.0,0.0
1632,7582844331202318793,103k50l05ce,Household Goods and Groceries,1563538320,312.09,2019-07-19 12:12:00,168248.0,1.0,0.0,0.0


In [41]:
task_1c_base_agg_00 = task_1c_base.groupby(['customer_id', 'grid_id']).agg(
    {'date_time':'count'}).rename(columns={'date_time':'cnt_1c'}).reset_index()
task_1c_base_agg_00['cust_cnt'] = 1

task_1c_base_agg_10 = task_1c_base_agg_00.groupby(['grid_id']).agg({'cust_cnt':'sum', 'cnt_1c':'sum'}).reset_index()

task_1c_base_agg_10.head()

Unnamed: 0,grid_id,cust_cnt,cnt_1c
0,159117.0,13,13
1,159122.0,192,199
2,159817.0,110,110
3,159818.0,200,203
4,159820.0,208,217


In [42]:
task_1c_result = task_1c_base_agg_10.sort_values(by='cust_cnt', ascending=False)[['grid_id', 'cust_cnt']].head(10)
task_1c_result

Unnamed: 0,grid_id,cust_cnt
371,176673.0,2870
252,171745.0,2190
296,173160.0,1955
167,168253.0,1657
88,165435.0,1319
130,166847.0,1237
52,163334.0,1233
240,171054.0,1042
299,173165.0,1036
67,164041.0,1027


This is not the final solution of 1c), as it has just only got 15%. The uploaded version will be the next.

In [43]:
task_1c_result.to_csv('res_1c.csv')

### 1.C - final solution

In [44]:
good_grid = pd.read_csv('res_1b_v2.csv')
good_grid = good_grid[['grid_id']]
good_grid['good_grid'] = 1
good_grid.head(2)

Unnamed: 0,grid_id,good_grid
0,176673.0,1
1,175259.0,1


In [45]:
task_1c_base = pd.merge(pd_transaction, pd_merchants[['MerchantCode', 'grid_id']], how='left', on='MerchantCode')
task_1c_base_20 = pd.merge(task_1c_base, good_grid, how='left', on='grid_id').fillna({'good_grid':0})
task_1c_base_20['date_day'] = task_1c_base_20['date_time_2'].dt.date
task_1c_base_20.head()

Unnamed: 0,customer_id,MerchantCode,MerchantCategory,date_time,GTV,date_time_2,grid_id,good_grid,date_day
0,r43045724262114812,pmbmkbipk,Transit & Travel,1563536760,31.45,2019-07-19 11:46:00,176673.0,1.0,2019-07-19
1,165324620748557838,92r201n2a0iy,Household Goods and Groceries,1563536760,17.25,2019-07-19 11:46:00,175259.0,1.0,2019-07-19
2,h233548585934022368,3h00c55uk01,Household Goods and Groceries,1563536760,88.0,2019-07-19 11:46:00,171748.0,1.0,2019-07-19
3,6074516273378802718,110o7is610eu,F&B,1563536760,50.82,2019-07-19 11:46:00,171758.0,1.0,2019-07-19
4,860214065958232817,50013tcm50aa,Household Goods and Groceries,1563536760,16.97,2019-07-19 11:46:00,177367.0,1.0,2019-07-19


In [46]:
task_1c_base_agg_00 = task_1c_base_20.groupby(['date_day', 'customer_id', 'grid_id']).agg(
    {'date_time':'count'}).rename(columns={'date_time':'cnt_1c'}).reset_index()
task_1c_base_agg_00['cust_cnt'] = 1

task_1c_base_agg_10 = task_1c_base_agg_00.groupby(['date_day', 'grid_id']).agg({'cust_cnt':'sum'}).reset_index()
task_1c_base_agg_20 = task_1c_base_agg_10.groupby(['grid_id']).agg({'cust_cnt':'max'}).reset_index()

task_1c_base_agg_20.head()

Unnamed: 0,grid_id,cust_cnt
0,159117.0,4
1,159118.0,16
2,159121.0,9
3,159122.0,21
4,159817.0,22


In [47]:
task_1c_result_v2 = task_1c_base_agg_20.sort_values(by='cust_cnt', ascending=False)[['grid_id', 'cust_cnt']].head(10)
task_1c_result_v2

Unnamed: 0,grid_id,cust_cnt
486,176673.0,1633
329,171745.0,256
131,165439.0,214
385,173160.0,213
226,168253.0,208
164,166833.0,199
178,166847.0,173
79,163334.0,164
404,173859.0,148
60,162629.0,144


This is the uploaded solution for 1C, as it got 50%

In [48]:
task_1c_result_v2.to_csv('res_1c_v3.csv')

## END