In [1]:
import configparser
import unittest
import datetime
import sys
import pandas as pd

# from tqdm import tqdm_notebook

sys.path.append("/home/sergey/drclinics/common")
from universal_connection import UniversalConnection, DBType

sys.path.append("/home/sergey/drclinics/reports")
from report_utils import excel_report

import numpy as np
from log import log

In [2]:
sql = '''
select a.id appoint_id,
	   p.person_id,
	   per.phone,
	   --per.partner_id,
	   --par."name", 
	   case 
        	when promo.description_tsvector = 'телемедицина_вэб' then 'VEBMED'
        	else 'ПРОЧИЕ' 
       end "name",
	   DATE(a.finished + 3 * interval '1 hour')
from appointment a
left join patient p on a.patient_id = p.id 
left join person per on p.person_id = per.id 
left join promotion promo on a.promotion_id = promo.id
where a.finished at time zone 'UTC' > '2020-03-30'
and a.finished < timezone('UTC', now() - (extract(hour from now())) * interval '1 hour' - (extract(minute from now())) * interval '1 minute' - (extract(second from now())) * interval '1 second')
and a.good
	and not (
			a.patient_id in (
							select patient_id from patient_categories pcat
							inner join reference rf 
							on rf.id = pcat.reference_id
							where rf.code = 'TEST'
			)
			or lower(a.report_comment) = 'тест'
	)

'''

In [3]:
connection = UniversalConnection('../../../.credentials/telemed/prom.cfg', DBType.Postgres)

2020-04-09 16:46:49 connect to postgres database using config file "../../../.credentials/telemed/prom.cfg"
2020-04-09 16:46:49 creating ssh tunnel to 172.16.100.19 as root...
2020-04-09 16:46:59 connect postgres using parameters:
                    database: telemed
				    user: norekhov
				    password: ***masked***
				    host: localhost
				    port: 43543
2020-04-09 16:46:59 @telemed: execute sql:
				    SET TIME ZONE 'Europe/Moscow'
				    None


In [4]:
data = connection.query(sql)

2020-04-09 16:46:59 @telemed query:
                    select a.id appoint_id,
				    	   p.person_id,
				    	   per.phone,
				    	   --per.partner_id,
				    	   --par."name", 
				    	   case 
				            	when promo.description_tsvector = 'телемедицина_вэб' then 'VEBMED'
				            	else 'ПРОЧИЕ' 
				           end "name",
				    	   DATE(a.finished + 3 * interval '1 hour')
				    from appointment a
				    left join patient p on a.patient_id = p.id 
				    left join person per on p.person_id = per.id 
				    left join promotion promo on a.promotion_id = promo.id
				    where a.finished at time zone 'UTC' > '2020-03-30'
				    and a.finished < timezone('UTC', now() - (extract(hour from now())) * interval '1 hour' - (extract(minute from now())) * interval '1 minute' - (extract(second from now())) * interval '1 second')
				    and a.good
				    	and not (
				    			a.patient_id in (
				    							select patient_id from patient_categories pcat
				    						

In [5]:
connection.close()

2020-04-09 16:47:00 @telemed: closing postgres connection...
2020-04-09 16:47:00 closing ssh tunnel to 172.16.100.19...


In [6]:
df = pd.DataFrame(data)

In [7]:
df.head()

Unnamed: 0,appoint_id,person_id,phone,name,date
0,137211,1585718,79091582287,ПРОЧИЕ,2020-04-08
1,127919,2546091,79518565014,VEBMED,2020-03-31
2,136013,3201622,79089255484,ПРОЧИЕ,2020-04-07
3,128137,1141636,79180989263,ПРОЧИЕ,2020-03-31
4,135952,3201364,79911116364,VEBMED,2020-04-07


In [8]:
def phone_base():
    base = pd.read_excel('phone_base.xlsm')
    
    column_names = ['Region', 'Code', 'useless', 'start', 'end', 'fullstart', 'fullend', 'Region-2']
    base.columns = column_names
    
    return base

In [9]:
def get_region_by_phone(phone, base=phone_base()):
    if type(phone) != str:
        phone = str(phone)
    phone = ''.join(i for i in phone if i.isdigit())    
    if phone[0] == '8' or phone[0] == '7':
        phone = phone[1:]
    if len(phone) != 10:
        return 'Неизвестный регион'
    
    #if int(phone[0:3]) in list(base.Code.unique()):
    return base.loc[(base['Code'] == int(phone[0:3])) & 
                          (base['start'] <= int(phone[3:])) &
                          (base['end'] >= int(phone[3:]))
                         ]['Region'].to_string(index=False).strip()
    #else: 
    #    return 'Неизвестный регион'
        

In [10]:
#get_region_by_phone('9011009999')

In [11]:
#get_region_by_phone('+79260001133')

In [12]:
#get_region_by_phone('7(928)997-00-00')

In [13]:
#get_region_by_phone(89011009999)

In [14]:
df_test = pd.DataFrame(df.head())

In [15]:
df_test['region'] = df_test['phone'].apply(get_region_by_phone)

In [16]:
df_test

Unnamed: 0,appoint_id,person_id,phone,name,date,region
0,137211,1585718,79091582287,ПРОЧИЕ,2020-04-08,Московская область
1,127919,2546091,79518565014,VEBMED,2020-03-31,Воронежская область
2,136013,3201622,79089255484,ПРОЧИЕ,2020-04-07,Свердловская область
3,128137,1141636,79180989263,ПРОЧИЕ,2020-03-31,Краснодарский край
4,135952,3201364,79911116364,VEBMED,2020-04-07,"Series([], )"


In [17]:
df_test.loc[4]['region']

'Series([], )'

In [18]:
df_test.replace('Series([], )', 'Неизвестный регион')

Unnamed: 0,appoint_id,person_id,phone,name,date,region
0,137211,1585718,79091582287,ПРОЧИЕ,2020-04-08,Московская область
1,127919,2546091,79518565014,VEBMED,2020-03-31,Воронежская область
2,136013,3201622,79089255484,ПРОЧИЕ,2020-04-07,Свердловская область
3,128137,1141636,79180989263,ПРОЧИЕ,2020-03-31,Краснодарский край
4,135952,3201364,79911116364,VEBMED,2020-04-07,Неизвестный регион


In [19]:
df['region'] = df['phone'].apply(get_region_by_phone)

In [20]:
df = df.replace('Series([], )', 'Неизвестный регион')

In [21]:
df.head(15)

Unnamed: 0,appoint_id,person_id,phone,name,date,region
0,137211,1585718,79091582287,ПРОЧИЕ,2020-04-08,Московская область
1,127919,2546091,79518565014,VEBMED,2020-03-31,Воронежская область
2,136013,3201622,79089255484,ПРОЧИЕ,2020-04-07,Свердловская область
3,128137,1141636,79180989263,ПРОЧИЕ,2020-03-31,Краснодарский край
4,135952,3201364,79911116364,VEBMED,2020-04-07,Неизвестный регион
5,127960,2626621,79149123969,ПРОЧИЕ,2020-03-31,Иркутская область
6,127116,2632032,79002066203,ПРОЧИЕ,2020-03-31,Неизвестный регион
7,135890,792842,79172240600,ПРОЧИЕ,2020-04-07,Республика Татарстан
8,131310,3084235,79166063464,VEBMED,2020-04-04,Московская область
9,127866,3056218,79608725544,ПРОЧИЕ,2020-03-31,Волгоградская область


In [22]:
reg_names = [name for name in df.region.unique()]

In [23]:
reg_names.sort()

In [24]:
reg_names.insert(0, 'Всего консультаций')

In [25]:
#reg_names

In [26]:
#reg_names.index('Incorrect phone')

In [27]:
d = dict()

In [28]:
for i in range(df.shape[0]):
    d[df['date'][i]] = d.get(df['date'][i], [0 for name in reg_names]) 
    d[df['date'][i]][reg_names.index(df['region'][i])] += 1
    d[df['date'][i]][0] += 1

In [29]:
#d

In [30]:
report = pd.DataFrame.from_dict(d, orient='index').sort_index()

In [31]:
report.columns = reg_names

In [32]:
report

Unnamed: 0,Всего консультаций,Алтайский край,Амурская область,"Архангельская область, Ненецкий АО",Астраханская область,Белгородская область,Брянская область,Владимирская область,Волгоградская область,Вологодская область,...,Тюменская область,Ульяновская область,Хабаровский Край,Ханты-Мансийский АО,Челябинская область,Чеченская Республика,"Читинская область, Агинский Бурятский АО",Чувашская Республика,Ямало-Ненецкий АО,Ярославская область
2020-03-30,526,3,2,0,0,8,0,5,3,0,...,2,2,5,3,7,0,1,1,3,2
2020-03-31,571,2,0,2,2,3,0,2,7,1,...,1,1,1,4,10,1,1,4,4,3
2020-04-01,702,1,1,3,0,7,1,6,6,1,...,6,4,3,6,5,2,0,2,2,9
2020-04-02,736,3,1,5,4,4,3,9,8,4,...,4,5,3,3,3,0,0,2,1,13
2020-04-03,843,8,2,9,1,5,5,5,5,2,...,3,4,3,4,9,1,4,2,2,5
2020-04-04,744,3,1,3,0,6,1,6,3,0,...,3,2,8,3,8,0,1,6,0,7
2020-04-05,737,4,0,4,1,9,4,5,10,1,...,4,2,3,5,11,1,5,3,1,11
2020-04-06,1001,9,2,3,1,7,3,12,5,5,...,2,5,5,2,13,0,6,2,5,14
2020-04-07,951,6,2,3,0,11,1,2,4,7,...,6,4,8,4,11,1,2,4,3,22
2020-04-08,1136,8,2,0,2,7,0,8,8,6,...,7,7,7,8,5,1,1,2,2,16


In [51]:
d2 = dict()

In [52]:
dates = [date for date in df.date.unique()]

In [53]:
dates.sort()

In [54]:
dates

[datetime.date(2020, 3, 30),
 datetime.date(2020, 3, 31),
 datetime.date(2020, 4, 1),
 datetime.date(2020, 4, 2),
 datetime.date(2020, 4, 3),
 datetime.date(2020, 4, 4),
 datetime.date(2020, 4, 5),
 datetime.date(2020, 4, 6),
 datetime.date(2020, 4, 7),
 datetime.date(2020, 4, 8)]

In [55]:
for i in range(df.shape[0]):
    d2[df['region'][i]] = d2.get(df['region'][i], [0 for date in dates]) 
    d2[df['region'][i]][dates.index(df['date'][i])] += 1
    #d2[df['region'][i]][0] += 1

In [56]:
#d2

In [57]:
report2 = pd.DataFrame.from_dict(d2, orient='index').sort_index()

In [58]:
report2.columns = dates

In [59]:
report2['total'] = report2.sum(axis=1)

In [60]:
report2.sort_values('total', ascending=False).head(25)

Unnamed: 0,2020-03-30,2020-03-31,2020-04-01,2020-04-02,2020-04-03,2020-04-04,2020-04-05,2020-04-06,2020-04-07,2020-04-08,total
Неизвестный регион,145,120,174,171,192,201,202,291,259,290,2045
Московская область,120,163,178,189,219,172,159,213,213,229,1855
Ленинградская область,39,50,61,52,57,49,51,59,46,52,516
Республика Татарстан,8,14,10,10,17,13,42,44,29,35,222
Ивановская область,5,4,7,4,6,6,6,6,37,119,200
Свердловская область,7,12,16,21,18,14,15,15,16,19,153
Республика Башкортостан,10,8,19,9,6,28,19,20,18,15,152
Краснодарский край,8,19,19,12,13,10,9,22,13,23,148
Нижегородская область,12,9,12,13,12,14,10,15,14,14,125
Самарская область,6,9,13,16,16,15,14,11,9,14,123


In [230]:
report2 = report2.sort_values('total', ascending=False)

In [232]:
report2.to_excel('app_regions_2.xls')

----

# WEBMED

In [113]:
d3 = dict()

In [114]:
dates = [date for date in df.date.unique()]
dates.sort()

In [115]:
#df['region'][1]

In [116]:
for i in range(df.shape[0]):
    if df['name'][i] == 'VEBMED':
        #region_name = df[]
        d3[df['region'][i] + ' VEBMED'] = d3.get(df['region'][i]  + ' VEBMED', [0 for date in dates]) 
        d3[df['region'][i] + ' VEBMED'][dates.index(df['date'][i])] += 1
    #d2[df['region'][i]][0] += 1
    
    if df['name'][i] != 'VEBMED':
        d3[df['region'][i] + ' _прочие'] = d3.get(df['region'][i] + ' _прочие', [0 for date in dates]) 
        d3[df['region'][i] + ' _прочие'][dates.index(df['date'][i])] += 1

In [117]:
report3 = pd.DataFrame.from_dict(d3, orient='index').sort_index()

In [118]:
report3.columns = dates

In [119]:
report3['total'] = report3.sum(axis=1)

In [120]:
report3

Unnamed: 0,2020-03-30,2020-03-31,2020-04-01,2020-04-02,2020-04-03,2020-04-04,2020-04-05,2020-04-06,2020-04-07,2020-04-08,total
Алтайский край VEBMED,0,1,0,1,5,0,2,3,3,2,17
Алтайский край _прочие,3,1,1,2,3,3,2,6,3,6,30
Амурская область VEBMED,0,0,1,1,2,1,0,0,0,2,7
Амурская область _прочие,2,0,0,0,0,0,0,2,2,0,6
"Архангельская область, Ненецкий АО VEBMED",0,0,0,0,0,1,1,2,2,0,6
...,...,...,...,...,...,...,...,...,...,...,...
Чувашская Республика _прочие,1,4,2,2,2,5,3,1,3,1,24
Ямало-Ненецкий АО VEBMED,0,0,0,0,1,0,0,2,1,0,4
Ямало-Ненецкий АО _прочие,3,4,2,1,1,0,1,3,2,2,19
Ярославская область VEBMED,0,3,6,11,4,4,4,14,20,12,78


In [121]:
report3.to_excel('app_regions_3.xls')