In [1]:
import configparser
import unittest
import datetime
import sys
import pandas as pd

# from tqdm import tqdm_notebook

sys.path.append("/home/sergey/drclinics/common")
from universal_connection import UniversalConnection, DBType

sys.path.append("/home/sergey/drclinics/reports")
from report_utils import excel_report

import numpy as np
from log import log

In [2]:
sql = """
select
    DATE(a.finished + 3 * interval '1 hour'),
    'CHAT' in (
        select reference.code
        from appointment_connection_type
        left join reference on reference.id = appointment_connection_type.reference_id
        where a.id = appointment_connection_type.appointment_id
        ) as is_chat,
    'VIDEO' in (
        select reference.code
        from appointment_connection_type
        left join reference on reference.id = appointment_connection_type.reference_id
        where a.id = appointment_connection_type.appointment_id
        ) as is_video,
    'AUDIO' in (
        select reference.code
        from appointment_connection_type
        left join reference on reference.id = appointment_connection_type.reference_id
        where a.id = appointment_connection_type.appointment_id
        ) as is_audio,
    'PHONE' in (
        select reference.code
        from appointment_connection_type
        left join reference on reference.id = appointment_connection_type.reference_id
        where a.id = appointment_connection_type.appointment_id
        ) as is_phone,
    "source",
    case
        when pe.partner_id in (
            select id from partner
            where name = 'МК Доктор рядом'
            )
        then 'self'
        else 'external'
    end doctor_org,
    case
        when p.description_tsvector = 'телемедицина_вэб' then 'VEBMED'
        else 'ПРОЧИЕ'
    end promo_type,
    person.phone 
    -- DEBUG
    --, a.*
from appointment a
left join promotion p on a.promotion_id = p.id
left join product_condition pc on pc.id = p.product_condition_id
left join doctor d on d.id = a.doctor_id
left join person pe on pe.id = d.person_id
left join patient on a.patient_id = patient.id 
left join person on patient.person_id = person.id
where
-- DEBUG
-- a.finished at time zone 'UTC' between '2020-04-02' and '2020-04-02 19:00:00'
   a.finished at time zone 'UTC' > '2020-03-30'
  and a.good
  and not (
    a.patient_id in (
        select patient_id
        from patient_categories pcat
        inner join reference rf
           on rf.id=pcat.reference_id
        where rf.code='TEST'
        )
    or lower(a.report_comment)='тест'
    )
"""

In [3]:
connection = UniversalConnection('../../../.credentials/telemed/prom.cfg', DBType.Postgres)

2020-04-17 13:13:10 connect to postgres database using config file "../../../.credentials/telemed/prom.cfg"
2020-04-17 13:13:10 creating ssh tunnel to 172.16.100.19 as root...
2020-04-17 13:13:37 connect postgres using parameters:
                    database: telemed
				    user: norekhov
				    password: ***masked***
				    host: localhost
				    port: 38179
2020-04-17 13:13:37 @telemed: execute sql:
				    SET TIME ZONE 'Europe/Moscow'
				    None


In [4]:
data = connection.query(sql)

2020-04-17 13:13:37 @telemed query:
                    select
				        DATE(a.finished + 3 * interval '1 hour'),
				        'CHAT' in (
				            select reference.code
				            from appointment_connection_type
				            left join reference on reference.id = appointment_connection_type.reference_id
				            where a.id = appointment_connection_type.appointment_id
				            ) as is_chat,
				        'VIDEO' in (
				            select reference.code
				            from appointment_connection_type
				            left join reference on reference.id = appointment_connection_type.reference_id
				            where a.id = appointment_connection_type.appointment_id
				            ) as is_video,
				        'AUDIO' in (
				            select reference.code
				            from appointment_connection_type
				            left join reference on reference.id = appointment_connection_type.reference_id
				            where a.id = appointment_connection_type.a

In [5]:
connection.close()

2020-04-17 13:13:38 @telemed: closing postgres connection...
2020-04-17 13:13:38 closing ssh tunnel to 172.16.100.19...


In [6]:
df = pd.DataFrame(data)

In [7]:
df.head()

Unnamed: 0,date,is_chat,is_video,is_audio,is_phone,source,doctor_org,promo_type,phone
0,2020-04-08,False,False,False,True,SCHEDULED,external,ПРОЧИЕ,79091582287
1,2020-03-31,True,False,False,False,QUEUED,self,VEBMED,79518565014
2,2020-04-15,False,False,False,True,SCHEDULED,external,VEBMED,79687101977
3,2020-04-07,False,False,False,True,QUEUED,external,ПРОЧИЕ,79089255484
4,2020-03-31,False,False,False,True,SCHEDULED,external,ПРОЧИЕ,79180989263


In [8]:
def phone_base():
    base = pd.read_csv('DEF.csv')
    
    column_base = ['code', 'start', 'end', 'capacity', 'operator', 'region']
    base.columns = column_base
    
    return base

In [9]:
def get_region_by_phone(phone, base=phone_base()):
    if not phone:
        return 'Неизвестный регион'
    if type(phone) != str:
        phone = str(phone)
    phone = ''.join(i for i in phone if i.isdigit())    
    if phone[0] == '8' or phone[0] == '7':
        phone = phone[1:]
    if len(phone) != 10:
        return 'Неизвестный регион'
    return base.loc[(base['code'] == int(phone[0:3])) & 
                          (base['start'] <= int(phone[3:])) &
                          (base['end'] >= int(phone[3:]))
                         ]['region'].to_string(index=False).strip()

In [10]:
df['region'] = df['phone'].apply(get_region_by_phone)

In [11]:
df = df.replace('Series([], )', 'Неизвестный регион')

In [12]:
df

Unnamed: 0,date,is_chat,is_video,is_audio,is_phone,source,doctor_org,promo_type,phone,region
0,2020-04-08,False,False,False,True,SCHEDULED,external,ПРОЧИЕ,79091582287,г. Москва и Московская область
1,2020-03-31,True,False,False,False,QUEUED,self,VEBMED,79518565014,Воронежская обл.
2,2020-04-15,False,False,False,True,SCHEDULED,external,VEBMED,79687101977,г. Москва и Московская область
3,2020-04-07,False,False,False,True,QUEUED,external,ПРОЧИЕ,79089255484,Свердловская обл.
4,2020-03-31,False,False,False,True,SCHEDULED,external,ПРОЧИЕ,79180989263,Краснодарский край
...,...,...,...,...,...,...,...,...,...,...
16128,2020-04-16,False,False,False,True,SCHEDULED,external,ПРОЧИЕ,79268753130,г. Москва и Московская область
16129,2020-04-16,False,False,True,False,QUEUED,self,VEBMED,79028197547,Ханты - Мансийский - Югра АО
16130,2020-04-16,False,False,False,True,SCHEDULED,external,VEBMED,79618187336,Республика Адыгея
16131,2020-04-16,False,False,False,True,SCHEDULED,external,VEBMED,79206741100,Ивановская обл.


In [13]:
# df.to_excel('raw_data.xls')

In [13]:
df['consultation_type'] = ''

In [14]:
df.head()

Unnamed: 0,date,is_chat,is_video,is_audio,is_phone,source,doctor_org,promo_type,phone,region,consultation_type
0,2020-04-08,False,False,False,True,SCHEDULED,external,ПРОЧИЕ,79091582287,г. Москва и Московская область,
1,2020-03-31,True,False,False,False,QUEUED,self,VEBMED,79518565014,Воронежская обл.,
2,2020-04-15,False,False,False,True,SCHEDULED,external,VEBMED,79687101977,г. Москва и Московская область,
3,2020-04-07,False,False,False,True,QUEUED,external,ПРОЧИЕ,79089255484,Свердловская обл.,
4,2020-03-31,False,False,False,True,SCHEDULED,external,ПРОЧИЕ,79180989263,Краснодарский край,


In [15]:
for i in range(df.shape[0]):
    if df['is_video'][i]:
        df['consultation_type'][i] = 'video'
    elif df['is_audio'][i]:
        df['consultation_type'][i] = 'audio'
    elif df['is_chat'][i]:
        df['consultation_type'][i] = 'chat'
    else: # пожелание бизнеса!!!
        df['consultation_type'][i] = 'phone'
    #else:
    #    df['consultation_type'][i] = 'unknown'


In [16]:
df.head(25)

Unnamed: 0,date,is_chat,is_video,is_audio,is_phone,source,doctor_org,promo_type,phone,region,consultation_type
0,2020-04-08,False,False,False,True,SCHEDULED,external,ПРОЧИЕ,79091582287,г. Москва и Московская область,phone
1,2020-03-31,True,False,False,False,QUEUED,self,VEBMED,79518565014,Воронежская обл.,chat
2,2020-04-15,False,False,False,True,SCHEDULED,external,VEBMED,79687101977,г. Москва и Московская область,phone
3,2020-04-07,False,False,False,True,QUEUED,external,ПРОЧИЕ,79089255484,Свердловская обл.,phone
4,2020-03-31,False,False,False,True,SCHEDULED,external,ПРОЧИЕ,79180989263,Краснодарский край,phone
5,2020-04-07,False,False,False,True,SCHEDULED,external,VEBMED,79911116364,Ставропольский край,phone
6,2020-03-31,False,False,True,False,QUEUED,external,ПРОЧИЕ,79149123969,Иркутская обл.,audio
7,2020-03-31,False,False,True,False,ACTIVE,self,ПРОЧИЕ,79002066203,Свердловская обл.,audio
8,2020-04-07,False,False,False,True,QUEUED,self,ПРОЧИЕ,79172240600,Республика Татарстан,phone
9,2020-04-04,True,False,False,False,SCHEDULED,external,VEBMED,79166063464,г. Москва и Московская область,chat


In [17]:
df.consultation_type.unique()

array(['phone', 'chat', 'audio', 'video'], dtype=object)

In [68]:
#df.loc[1]['consultation_type']

In [18]:
df_group = df.groupby(['date','region', 'promo_type', 'consultation_type'])['source'].count().to_frame().reset_index().rename(columns={'source':'quantity'})

In [23]:
df_group.head(10)

Unnamed: 0,date,region,promo_type,consultation_type,quantity
0,2020-03-30,Алтайский край,ПРОЧИЕ,audio,2
1,2020-03-30,Алтайский край,ПРОЧИЕ,chat,1
2,2020-03-30,Алтайский край,ПРОЧИЕ,phone,1
3,2020-03-30,Амурская обл.,ПРОЧИЕ,audio,1
4,2020-03-30,Амурская обл.,ПРОЧИЕ,phone,1
5,2020-03-30,Астраханская обл.,ПРОЧИЕ,audio,1
6,2020-03-30,Белгородская обл.,ПРОЧИЕ,audio,1
7,2020-03-30,Белгородская обл.,ПРОЧИЕ,chat,4
8,2020-03-30,Белгородская обл.,ПРОЧИЕ,phone,3
9,2020-03-30,Владимирская обл.,ПРОЧИЕ,audio,1


In [21]:
#df[(df['date'] == df_group.loc[0]['date']) & (df['region'] == 'Алтайский край')]

In [22]:
#df[df['region'] == 'Ивановская обл.']

In [106]:
#df.groupby(['date','region', 'promo_type', 'is_chat', 'is_video', 'is_audio', 'is_phone'])['phone'].count().to_frame().reset_index()

In [98]:
#d3 = dict()

In [99]:
#dates = [date for date in df.date.unique()]
#dates.sort()

In [24]:
file_name = 'regions-consultation_type.xls'
writer = pd.ExcelWriter(file_name, engine='xlsxwriter')
df.to_excel(writer, sheet_name='Raw_data', index=False)
df_group.to_excel(writer, sheet_name='Group_by', index=False)

writer.save()