In [1]:
import re
import pandas as pd

import requests
from bs4 import BeautifulSoup
from selenium import webdriver

import folium
import geopandas as gpd

from tqdm import tqdm_notebook as tqdmn

import datetime

In [2]:
pd.set_option('display.max_columns', 60)

ecosys = pd.read_csv('ecosys.csv', delimiter=';', encoding='Windows-1251')
ecosys.loc[83] = ['7736249247', 'ООО "Центр недвижимости от Сбербанка"/ДомКлик']
ecosys.loc[84] = ['7736207543', 'Яндекс']
ecosys = ecosys.astype({'ИНН' : 'str'})
ecosys.head()

Unnamed: 0,ИНН,Название
0,7736279160,"ООО ""ОБЛАЧНЫЕ ТЕХНОЛОГИИ"""
1,7804568396,"ООО ""ДИАЛОГ"""
2,7736264044,"ООО ""СБЕРБАНК-ТЕЛЕКОМ"""
3,9706000111,"ООО ""ИННОВАЦИОННАЯ МЕДИЦИНА"""
4,9710049920,"ООО ""ДОКДОК ТЕРРИТОРИЯ ЗДОРОВЬЯ"""


In [3]:
def DF_from_list(locs, ecosys_indx):
    
    df = pd.DataFrame.from_records(locs)
    df.columns = ['address', 'latitude', 'longitude']
    df['ИНН'] = ecosys['ИНН'][ecosys_indx]
    df['Название'] = ecosys['Название'][ecosys_indx]
    df['Дата добавления'] = datetime.date.today().strftime("%d-%m-%Y")
    
    return df

In [4]:
def draw_map(dataframe, zoom_start = 3):

    dataframe = dataframe.astype({'latitude':'float64', 'longitude':'float64'}).copy()
    
    folium_map = folium.Map(location=[dataframe.latitude.mean(), 
                                      dataframe.longitude.mean()],
                                      zoom_start=zoom_start) 
    
    for i in range(len(dataframe)):
        
            folium.Marker(location=[dataframe.loc[i,'latitude'],
                                    dataframe.loc[i,'longitude']],
                                    tooltip=dataframe.loc[i, 'address']).add_to(folium_map)
            
    return folium_map

## SberЛогистика

In [5]:
req = requests.get('https://sberlogistics.ru/about/regions')
soup = BeautifulSoup(req.text, 'html.parser')

In [6]:
tag_p = soup.find_all('p', attrs={'class':'color-green'}) 

urls = []
for p in tag_p:
    for tag_a in p.find_all('a'):
        urls.append(tag_a['href'])

In [7]:
locations = []
for url in urls:
    req = requests.get('https://sberlogistics.ru'+url)
    soup = BeautifulSoup(req.text, 'html.parser')
    city = soup.h2.string
    addr = str(soup.find('div', {'class':'wrap item-grid item-grid-3'}).find_all('p')[1])
    addr = re.search(u'<br/>(.+?)</p>', addr).group(1)
    locations.append('{0}, {1}'.format(city, addr))

In [8]:
google_map_url = 'https://www.google.com/maps/search/'
Url_With_Coordinates = []

option = webdriver.ChromeOptions()
prefs = {'profile.default_content_setting_values': {'images':2, 'javascript':2}}
option.add_experimental_option('prefs', prefs)

driver = webdriver.Chrome("D:\\ML\\sber\\chromedriver_win32\\chromedriver.exe", options=option)

for loc in tqdmn(locations, leave=False):
    driver.get(google_map_url + loc)
    Url_With_Coordinates.append(driver.find_element_by_css_selector('meta[itemprop=image]').get_attribute('content'))
    
driver.close()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for loc in tqdmn(locations, leave=False):


  0%|          | 0/83 [00:00<?, ?it/s]

In [9]:
lat = re.findall(r'center=(\d+.\d+)%2C', ''.join(Url_With_Coordinates))
long = re.findall(r'%2C(\d+.\d+)&zoom=', ''.join(Url_With_Coordinates))

if len(long) != len(lat):
    lat = []
    long = []
    for url in Url_With_Coordinates:
        try:
            long.append(re.findall(r'%2C(\d+.\d+)&zoom=', url)[0])
            lat.append(re.findall(r'center=(\d+.\d+)', url)[0])
        except:
            lat.append(0)
            long.append(0)

In [10]:
idx = ecosys[ecosys['Название'] == 'ООО "СБЕРЛОГИСТИКА"'].index
sber_logistic = DF_from_list(list(zip(locations, lat, long)), idx)
sber_logistic.drop(sber_logistic[(sber_logistic.latitude==0) | (sber_logistic.longitude==0)].index, axis=0, inplace=True)

In [11]:
draw_map(sber_logistic)

In [12]:
#sber_logistic.to_csv('sber_logistic.csv')

## Расчет расстояний между объектами 

In [13]:
def nearby_partners(gpd1, gpd2, radius=2000):
    
    gpd1 = gpd1.to_crs(epsg=3857).copy()
    gpd2 = gpd2.to_crs(epsg=3857).copy()
    
    def within_radius(x):
        dists = gpd2.distance(x)
        nearby = (dists <= radius).astype(int)
        return sum(nearby)
    
    gpd1['n'] = gpd1.geometry.apply(within_radius)
    
    return gpd1.to_crs(epsg=4326)

In [14]:
sber = pd.read_csv('sber(2).csv', delimiter=',')
sber.drop('Unnamed: 0', axis=1, inplace=True)
sber.head()

Unnamed: 0,address,longitude,latitude
0,"АО Ненецкий, г Нарьян-Мар, ул им В.И.Ленина,...",53.017146,67.639572
1,"АО Ханты-Мансийский, г Белоярский, мкр 3, 1",64.807693,60.125902
2,"АО Ханты-Мансийский, г Когалым, ул Молодежна...",74.47988,62.26361
3,"АО Ханты-Мансийский, г Лангепас, ул Комсомол...",75.186204,61.25286
4,"АО Ханты-Мансийский, г Лянтор, мкр 6-й, стр13",72.171621,61.615684


In [15]:
gpd_partners = gpd.GeoDataFrame(geometry = gpd.points_from_xy(sber_logistic.longitude, sber_logistic.latitude), crs=4326)
gpd_partners.head()

Unnamed: 0,geometry
0,POINT (91.42490 53.71606)
1,POINT (40.55865 64.54899)
2,POINT (48.02758 46.34512)
3,POINT (83.65005 53.33744)
4,POINT (36.56759 50.61879)


In [16]:
gpd_sber = gpd.GeoDataFrame(geometry = gpd.points_from_xy(sber.longitude, sber.latitude), crs=4326)
gpd_sber.head()

Unnamed: 0,geometry
0,POINT (53.01715 67.63957)
1,POINT (64.80769 60.12590)
2,POINT (74.47988 62.26361)
3,POINT (75.18620 61.25286)
4,POINT (72.17162 61.61568)


In [17]:
n_partners = nearby_partners(gpd_sber, gpd_partners, radius=1000)

In [18]:
df_columns = ['distance', 'address_sber', 'longitude_sber', 'latitude_sber', 'address_partner', 'longitude_partner', 'latitude_partner', 'partner_INN', 'partner_name']
df = pd.DataFrame(columns=df_columns)
for index in tqdmn(n_partners[n_partners.n>0].index, leave=False):
    dists = gpd_partners.to_crs(epsg=3857).distance(gpd_sber.to_crs(epsg=3857).loc[index].geometry)
    temp_df = sber_logistic[dists <= 1000].copy()
    temp_df['distance'] = dists[dists <= 1000].values
    temp_df.rename(columns={'longitude': 'longitude_partner', 'latitude':'latitude_partner', 'ИНН':'partner_INN', 'Название':'partner_name', 'address':'address_partner'}, inplace=True)
    temp_df['address_sber'] = sber.loc[index, 'address']
    temp_df['latitude_sber'] = sber.loc[index, 'latitude']
    temp_df['longitude_sber'] = sber.loc[index, 'longitude']
    temp_df = temp_df[df_columns].copy()
    df = df.append(temp_df, ignore_index=True).copy()
    del temp_df

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for index in tqdmn(n_partners[n_partners.n>0].index, leave=False):


  0%|          | 0/25 [00:00<?, ?it/s]

In [19]:
df.head()

Unnamed: 0,distance,address_sber,longitude_sber,latitude_sber,address_partner,longitude_partner,latitude_partner,partner_INN,partner_name
0,405.299318,"г Москва, г Москва, ул Вавилова, 19",37.579959,55.700275,"Москва, 119334, Москва, Вавилова, 24 к1",37.57719,55.7016069,,
1,370.836044,"край Забайкальский, г Чита, ул Бутина, 28",113.503525,52.037577,"Чита, ул. Костюшко-Григоровича, д. 5, пом. 5",113.5016694,52.0358751,,
2,574.027484,"край Забайкальский, г Чита, ул Журавлева, 2...",113.496904,52.034663,"Чита, ул. Костюшко-Григоровича, д. 5, пом. 5",113.5016694,52.0358751,,
3,899.493905,"край Забайкальский, г Чита, ул Ленина, 126,...",113.493668,52.036568,"Чита, ул. Костюшко-Григоровича, д. 5, пом. 5",113.5016694,52.0358751,,
4,719.240448,"край Ставропольский, г Ставрополь, пр-кт Кул...",41.915821,45.053162,"Ставрополь, ул. 3-я Промышленная д.3",41.9134202,45.0573997,,


In [20]:
#df.to_csv('.csv')

#with pd.ExcelWriter('Dists.xlsx') as writer:
#    df.to_excel(writer, sheet_name='List 1')