In [1]:
import os
import math
import pickle
import json
import gzip
import time
import calendar
from datetime import datetime
import numpy as np
import pandas as pd
from datetime import date
from multiprocessing import Pool
import warnings
warnings.filterwarnings('ignore')
PKL = './pkl/'
SETS = './sets/' # папка с файлами с наборами
LONG = 0.0159 # долгота расстояние 1 км от точки на восток или на запад
LAT = 0.009 # широта расстояние 1 км от точки на север или на юг

In [2]:
def default_serializer(obj): # для правильного преобразования даты в ISO формат
    if isinstance(obj, (date)):
        return obj.isoformat()
    raise TypeError(f'Object of type {obj.__class__.__name__} is not JSON serializable')
    

def load_dataset(filename): 
    if os.path.exists(SETS + filename + '.gz'):
        with gzip.open(SETS + filename + '.gz', 'rb') as gzip_ref:
            return pd.DataFrame(json.load(gzip_ref))
        
    
def save_dataset(filename):
    data = globals().get(filename)
    if data is not None:
        data = data.to_dict(orient='records')
        json_data = json.dumps(data, ensure_ascii=False, default=default_serializer)
        with gzip.open(SETS + filename + '.gz', 'wb') as gzip_file:
            gzip_file.write(json_data.encode('utf-8'))
        print(f'Сохранено {len(data)} записей в {filename}.gz')
        

def distance(lon1, lat1, lon2, lat2):
    """
    Функция для вычисления расстояния между двумя точками на плоскости по их GPS координатам
    """
    R = 6371  # Радиус Земли в километрах
    dlat = np.radians(lat2 - lat1)
    dlon = np.radians(lon2 - lon1)
    a = np.sin(dlat / 2) * np.sin(dlat / 2) + np.cos(np.radians(lat1)) * np.cos(np.radians(lat2)) * np.sin(dlon / 2) * np.sin(dlon / 2)
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c


def km_min(exams, ecology):
    counter = 0
    km_list = []
    for i, row in ecology.iterrows():
        if exams['datesSet'].intersection(row['datesSet']):
            a_list = exams['geoList']
            for a in range(0, len(a_list), 2):
                a_longlat = a_list[a:a+2]
                if len(a_longlat) >= 2: # если в списке больше двух точек
                    a_long = a_longlat[0]
                    a_lat = a_longlat[1]
                b_list = row['geoList']
                for b in range(0, len(b_list), 2):
                    b_longlat = b_list[b:b+2]
                    if len(b_longlat) >= 2:
                        b_long = b_longlat[0]
                        b_lat = b_longlat[1]
                    km = distance(a_long, a_lat, b_long, b_lat)
                    if km < 1:
                        counter += 1
                        km_list.append(km)
                        return 1-math.prod(km_list)
                        #print(f'расстояние {km} между [ {a_long}, {a_lat} ] и [ {b_long}, {b_lat} ]')
    return 0
    

def geo_list(geodata):
    geolist = []
    if type(geodata) == str:
        try: 
            geodata = eval(geodata)
        except Exeption as e:
            print(geodata)
    elif type(geodata) == dict:
        try:
            geolist = list(map(float, str(geodata['coordinates']).replace(',[]', '').replace('[', '').replace(']', '').split(',')))
        except Exception as e:
            print(e, geodata)
    elif type(geodata) == list:
        try:
            geolist = list(map(float, str(geodata[0]['coordinates']).replace(',[]', '').replace('[', '').replace(']', '').split(',')))
        except Exception as e:
            print(e, geodata)
    return geolist[0:2]


def dates_set(start, end):
    exams_range = pd.date_range(start=start, end=end)
    return set(exams_range)


def sets_intersects(exams_set, start, end):
    """
    Для уменьшения объема датасета ecology, проводим проверку, входят ли даты событий в наборы дат экзаменов
    """
    date_range = pd.date_range(start=start, end=end)
    date_set = set(date_range)
    if date_set.intersection(exams_set):
        return True
    else:
        return False

In [3]:
pd.set_option('display.max_colwidth', None) # для отображения полного текста в ячейках
pd.set_option('display.max_columns', None)

In [4]:
%%time
# Загружаем и обрабатываем облегченный вариант датасета экзаменов (не все параметры)
exams_lite = load_dataset('exams_lite')
exams_lite['datesSet'] = exams_lite.apply(lambda x: dates_set(x['start'], x['end']), axis=1)
exams_lite['geoList'] = exams_lite.apply(lambda x: geo_list(x['geoData']), axis=1)

CPU times: user 5.66 s, sys: 140 ms, total: 5.8 s
Wall time: 5.82 s


In [5]:
ecology_list = [file for file in os.listdir('./sets') if file.startswith('ecology_')]
ecology_list = [i.split('.')[0] for i in ecology_list if i.endswith('.gz')]
ecology_list.remove('ecology_digging')
ecology_list.remove('ecology_live_area')
ecology_list.remove('ecology_trashes')
ecology_list

['ecology_air',
 'ecology_autostations',
 'ecology_bad_gasstations',
 'ecology_capital',
 'ecology_city_places',
 'ecology_clinics',
 'ecology_dirt',
 'ecology_factories',
 'ecology_gasstations',
 'ecology_house_lines',
 'ecology_markets',
 'ecology_mean',
 'ecology_noise',
 'ecology_parks',
 'ecology_roads',
 'ecology_streets',
 'ecology_stroi',
 'ecology_transports']

In [6]:
%%time
ecology = {}
for i in ecology_list:
    print(f'Обработка {i}')
    data = load_dataset(i)
    data['datesSet'] = data.apply(lambda x: dates_set(x['start'], x['end']), axis=1)
    data['geoList'] = data.apply(lambda x: geo_list(x[i.replace('ecology_', '')]), axis=1)
    with open(PKL + i+'.pkl', 'wb') as file:
        pickle.dump(data, file)

Обработка ecology_air
Обработка ecology_autostations
Обработка ecology_bad_gasstations
Обработка ecology_capital
Обработка ecology_city_places
Обработка ecology_clinics
Обработка ecology_dirt
Обработка ecology_factories
Обработка ecology_gasstations
Обработка ecology_house_lines
Обработка ecology_markets
Обработка ecology_mean
Обработка ecology_noise
Обработка ecology_parks
Обработка ecology_roads
Обработка ecology_streets
Обработка ecology_stroi
Обработка ecology_transports
CPU times: user 57.6 s, sys: 2.57 s, total: 1min
Wall time: 1min 3s


In [None]:
# %%time
# # Стройки
# ecology_stroi = load_dataset('ecology_stroi')
# ecology_stroi['datesSet'] = ecology_stroi.apply(lambda x: dates_set(x['start'], x['end']), axis=1)
# ecology_stroi['geoList'] = ecology_stroi.apply(lambda x: geo_list(x['stroi']), axis=1)
# save_dataset('ecology_stroi')
# exams_lite['stroi'] = exams_lite.apply(lambda x: km_min(x, ecology_stroi), axis=1)

In [None]:
# %%time
# # Дороги
# ecology_roads = load_dataset('ecology_roads')
# ecology_roads['datesSet'] = ecology_roads.apply(lambda x: dates_set(x['start'], x['end']), axis=1)
# ecology_roads['geoList'] = ecology_roads.apply(lambda x: geo_list(x['roads']), axis=1)
# exams_lite['roads'] = exams_lite.apply(lambda x: km_min(x, ecology_roads), axis=1)

In [7]:
%%time
for i in ecology_list:
    print(f'Обработка {i}')
    with open(PKL+i+'.pkl', 'rb') as file:
        data = pickle.load(file)
    exams_lite[i.replace('ecology_', '')] = exams_lite.apply(lambda x: km_min(x, data), axis=1)

Обработка ecology_air
Обработка ecology_autostations
Обработка ecology_bad_gasstations
Обработка ecology_capital
Обработка ecology_city_places
Обработка ecology_clinics
Обработка ecology_dirt
Обработка ecology_factories
Обработка ecology_gasstations
Обработка ecology_house_lines
Обработка ecology_markets
Обработка ecology_mean
Обработка ecology_noise
Обработка ecology_parks
Обработка ecology_roads
Обработка ecology_streets
Обработка ecology_stroi
Обработка ecology_transports
CPU times: user 2h 58min 58s, sys: 2.16 s, total: 2h 59min
Wall time: 2h 59min 34s


In [8]:
# подсчет расстояний для всех эк.факторов
# exams_lite['stroi'] = exams_lite.apply(lambda x: km_min(x, ecology_stroi), axis=1)
exams_lite.head()

Unnamed: 0,global_id,start,end,stupid,geoData,datesSet,geoList,air,autostations,bad_gasstations,capital,city_places,clinics,dirt,factories,gasstations,house_lines,markets,mean,noise,parks,roads,streets,stroi,transports
0,797946035,2017-03-01T00:00:00,2017-09-30T00:00:00,3.8,"{'type': 'Point', 'coordinates': [37.723652, 55.814547]}","{2017-07-05 00:00:00, 2017-03-25 00:00:00, 2017-06-16 00:00:00, 2017-09-11 00:00:00, 2017-08-12 00:00:00, 2017-04-25 00:00:00, 2017-08-23 00:00:00, 2017-06-07 00:00:00, 2017-06-08 00:00:00, 2017-09-26 00:00:00, 2017-03-07 00:00:00, 2017-09-09 00:00:00, 2017-08-17 00:00:00, 2017-03-23 00:00:00, 2017-05-12 00:00:00, 2017-06-05 00:00:00, 2017-08-29 00:00:00, 2017-05-24 00:00:00, 2017-07-02 00:00:00, 2017-07-19 00:00:00, 2017-09-01 00:00:00, 2017-08-30 00:00:00, 2017-08-01 00:00:00, 2017-08-16 00:00:00, 2017-04-14 00:00:00, 2017-06-14 00:00:00, 2017-07-08 00:00:00, 2017-06-19 00:00:00, 2017-03-06 00:00:00, 2017-09-22 00:00:00, 2017-09-18 00:00:00, 2017-08-04 00:00:00, 2017-03-05 00:00:00, 2017-09-06 00:00:00, 2017-06-21 00:00:00, 2017-04-27 00:00:00, 2017-05-15 00:00:00, 2017-04-18 00:00:00, 2017-03-30 00:00:00, 2017-07-28 00:00:00, 2017-09-07 00:00:00, 2017-04-30 00:00:00, 2017-09-12 00:00:00, 2017-04-09 00:00:00, 2017-06-29 00:00:00, 2017-06-22 00:00:00, 2017-09-27 00:00:00, 2017-09-30 00:00:00, 2017-04-06 00:00:00, 2017-04-19 00:00:00, 2017-06-10 00:00:00, 2017-04-24 00:00:00, 2017-06-17 00:00:00, 2017-05-13 00:00:00, 2017-04-21 00:00:00, 2017-08-26 00:00:00, 2017-05-26 00:00:00, 2017-06-09 00:00:00, 2017-09-16 00:00:00, 2017-05-03 00:00:00, 2017-05-25 00:00:00, 2017-08-24 00:00:00, 2017-09-23 00:00:00, 2017-03-02 00:00:00, 2017-07-23 00:00:00, 2017-06-30 00:00:00, 2017-05-02 00:00:00, 2017-06-24 00:00:00, 2017-04-13 00:00:00, 2017-08-08 00:00:00, 2017-03-03 00:00:00, 2017-09-15 00:00:00, 2017-06-27 00:00:00, 2017-08-03 00:00:00, 2017-08-07 00:00:00, 2017-09-04 00:00:00, 2017-09-25 00:00:00, 2017-09-29 00:00:00, 2017-04-04 00:00:00, 2017-04-02 00:00:00, 2017-04-07 00:00:00, 2017-03-09 00:00:00, 2017-06-02 00:00:00, 2017-06-12 00:00:00, 2017-04-08 00:00:00, 2017-07-18 00:00:00, 2017-07-24 00:00:00, 2017-08-15 00:00:00, 2017-03-15 00:00:00, 2017-07-07 00:00:00, 2017-03-08 00:00:00, 2017-08-05 00:00:00, 2017-05-28 00:00:00, 2017-04-23 00:00:00, 2017-03-28 00:00:00, 2017-03-11 00:00:00, 2017-03-19 00:00:00, 2017-06-13 00:00:00, 2017-03-21 00:00:00, 2017-07-01 00:00:00, ...}","[37.723652, 55.814547]",0.0,0.0,0.0,0.0,0.792338,0.0,0.0,0.011392,0.11307,0.0,0.0,0.0,0.137005,0.0,0.0,0.0,0.41529,0.0
1,797949150,2017-03-01T00:00:00,2017-09-30T00:00:00,3.357143,"{'coordinates': [[[[37.710083127, 55.825085846], [37.709884644, 55.825088859], [37.70991683, 55.825557425], [37.710107267, 55.825552906], [37.710083127, 55.825085846]]], [[[37.710971466, 55.824666254], [37.710390321, 55.824675741], [37.710401249, 55.824826407], [37.710980407, 55.824816363], [37.710971466, 55.824666254]]], [[[37.710757032, 55.82506651], [37.710557953, 55.825070528], [37.710585371, 55.825566047], [37.710780874, 55.825561359], [37.710757032, 55.82506651]]], [[[37.707049481, 55.825154524], [37.707242202, 55.825149502], [37.707241209, 55.825030085], [37.70704054, 55.82503176], [37.707049481, 55.825154524]]], [[[37.710930347, 55.825726055], [37.711380243, 55.825718823], [37.711373091, 55.825557312], [37.711369514, 55.825539635], [37.710924625, 55.825550081], [37.710924625, 55.825570973], [37.710930347, 55.825726055]]], [[[37.583026114, 55.832647996], [37.582666499, 55.832611174], [37.582629743, 55.832706019], [37.582630736, 55.832758463], [37.58344732, 55.83284717], [37.583486063, 55.832741167], [37.583481096, 55.83270044], [37.583129428, 55.832659155], [37.583156251, 55.832582163], [37.584034426, 55.832677566], [37.584082109, 55.83255092], [37.584084096, 55.832518561], [37.583018166, 55.832398051], [37.582968496, 55.832531393], [37.582976443, 55.832562078], [37.583054923, 55.832573236], [37.583026114, 55.832647996]]], [[[37.709097862, 55.825586152], [37.70911789, 55.825928458], [37.70928669, 55.825930065], [37.709272385, 55.825774179], [37.70981884, 55.825766144], [37.709795952, 55.825566867], [37.709097862, 55.825586152]]]], 'type': 'MultiPolygon'}","{2017-07-05 00:00:00, 2017-03-25 00:00:00, 2017-06-16 00:00:00, 2017-09-11 00:00:00, 2017-08-12 00:00:00, 2017-04-25 00:00:00, 2017-08-23 00:00:00, 2017-06-07 00:00:00, 2017-06-08 00:00:00, 2017-09-26 00:00:00, 2017-03-07 00:00:00, 2017-09-09 00:00:00, 2017-08-17 00:00:00, 2017-03-23 00:00:00, 2017-05-12 00:00:00, 2017-06-05 00:00:00, 2017-08-29 00:00:00, 2017-05-24 00:00:00, 2017-07-02 00:00:00, 2017-07-19 00:00:00, 2017-09-01 00:00:00, 2017-08-30 00:00:00, 2017-08-01 00:00:00, 2017-08-16 00:00:00, 2017-04-14 00:00:00, 2017-06-14 00:00:00, 2017-07-08 00:00:00, 2017-06-19 00:00:00, 2017-03-06 00:00:00, 2017-09-22 00:00:00, 2017-09-18 00:00:00, 2017-08-04 00:00:00, 2017-03-05 00:00:00, 2017-09-06 00:00:00, 2017-06-21 00:00:00, 2017-04-27 00:00:00, 2017-05-15 00:00:00, 2017-04-18 00:00:00, 2017-03-30 00:00:00, 2017-07-28 00:00:00, 2017-09-07 00:00:00, 2017-04-30 00:00:00, 2017-09-12 00:00:00, 2017-04-09 00:00:00, 2017-06-29 00:00:00, 2017-06-22 00:00:00, 2017-09-27 00:00:00, 2017-09-30 00:00:00, 2017-04-06 00:00:00, 2017-04-19 00:00:00, 2017-06-10 00:00:00, 2017-04-24 00:00:00, 2017-06-17 00:00:00, 2017-05-13 00:00:00, 2017-04-21 00:00:00, 2017-08-26 00:00:00, 2017-05-26 00:00:00, 2017-06-09 00:00:00, 2017-09-16 00:00:00, 2017-05-03 00:00:00, 2017-05-25 00:00:00, 2017-08-24 00:00:00, 2017-09-23 00:00:00, 2017-03-02 00:00:00, 2017-07-23 00:00:00, 2017-06-30 00:00:00, 2017-05-02 00:00:00, 2017-06-24 00:00:00, 2017-04-13 00:00:00, 2017-08-08 00:00:00, 2017-03-03 00:00:00, 2017-09-15 00:00:00, 2017-06-27 00:00:00, 2017-08-03 00:00:00, 2017-08-07 00:00:00, 2017-09-04 00:00:00, 2017-09-25 00:00:00, 2017-09-29 00:00:00, 2017-04-04 00:00:00, 2017-04-02 00:00:00, 2017-04-07 00:00:00, 2017-03-09 00:00:00, 2017-06-02 00:00:00, 2017-06-12 00:00:00, 2017-04-08 00:00:00, 2017-07-18 00:00:00, 2017-07-24 00:00:00, 2017-08-15 00:00:00, 2017-03-15 00:00:00, 2017-07-07 00:00:00, 2017-03-08 00:00:00, 2017-08-05 00:00:00, 2017-05-28 00:00:00, 2017-04-23 00:00:00, 2017-03-28 00:00:00, 2017-03-11 00:00:00, 2017-03-19 00:00:00, 2017-06-13 00:00:00, 2017-03-21 00:00:00, 2017-07-01 00:00:00, ...}","[37.710083127, 55.825085846]",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.251141,0.0,0.0,0.0,0.0,0.160661,0.0,0.0,0.0,0.423029,0.0
2,797949275,2017-03-01T00:00:00,2017-09-30T00:00:00,1.1,"{'coordinates': [[[[37.807984129, 55.726758653], [37.808307186, 55.726841896], [37.808526531, 55.726567328], [37.808196321, 55.726480727], [37.807984129, 55.726735157], [37.807984129, 55.726758653]]], [[[37.808976173, 55.726681066], [37.808727265, 55.726995241], [37.808704376, 55.727030686], [37.808858872, 55.727067742], [37.808921814, 55.726979129], [37.808944702, 55.72695174], [37.80899334, 55.726892127], [37.809216499, 55.726948517], [37.80932808, 55.726974296], [37.809522629, 55.72702263], [37.809602738, 55.726898572], [37.809597015, 55.72684057], [37.808976173, 55.726681066]]], [[[37.809706703, 55.726866063], [37.809706703, 55.726887545], [37.809673324, 55.72692044], [37.809673324, 55.726957362], [37.809852138, 55.727005025], [37.810194269, 55.726581425], [37.810197845, 55.726525035], [37.810020223, 55.7264767], [37.809706703, 55.726866063]]], [[[37.808979526, 55.726210184], [37.808632627, 55.726643858], [37.808636203, 55.726692864], [37.808812633, 55.726735828], [37.809155956, 55.726312897], [37.809153572, 55.726253149], [37.808979526, 55.726210184]]]], 'type': 'MultiPolygon'}","{2017-07-05 00:00:00, 2017-03-25 00:00:00, 2017-06-16 00:00:00, 2017-09-11 00:00:00, 2017-08-12 00:00:00, 2017-04-25 00:00:00, 2017-08-23 00:00:00, 2017-06-07 00:00:00, 2017-06-08 00:00:00, 2017-09-26 00:00:00, 2017-03-07 00:00:00, 2017-09-09 00:00:00, 2017-08-17 00:00:00, 2017-03-23 00:00:00, 2017-05-12 00:00:00, 2017-06-05 00:00:00, 2017-08-29 00:00:00, 2017-05-24 00:00:00, 2017-07-02 00:00:00, 2017-07-19 00:00:00, 2017-09-01 00:00:00, 2017-08-30 00:00:00, 2017-08-01 00:00:00, 2017-08-16 00:00:00, 2017-04-14 00:00:00, 2017-06-14 00:00:00, 2017-07-08 00:00:00, 2017-06-19 00:00:00, 2017-03-06 00:00:00, 2017-09-22 00:00:00, 2017-09-18 00:00:00, 2017-08-04 00:00:00, 2017-03-05 00:00:00, 2017-09-06 00:00:00, 2017-06-21 00:00:00, 2017-04-27 00:00:00, 2017-05-15 00:00:00, 2017-04-18 00:00:00, 2017-03-30 00:00:00, 2017-07-28 00:00:00, 2017-09-07 00:00:00, 2017-04-30 00:00:00, 2017-09-12 00:00:00, 2017-04-09 00:00:00, 2017-06-29 00:00:00, 2017-06-22 00:00:00, 2017-09-27 00:00:00, 2017-09-30 00:00:00, 2017-04-06 00:00:00, 2017-04-19 00:00:00, 2017-06-10 00:00:00, 2017-04-24 00:00:00, 2017-06-17 00:00:00, 2017-05-13 00:00:00, 2017-04-21 00:00:00, 2017-08-26 00:00:00, 2017-05-26 00:00:00, 2017-06-09 00:00:00, 2017-09-16 00:00:00, 2017-05-03 00:00:00, 2017-05-25 00:00:00, 2017-08-24 00:00:00, 2017-09-23 00:00:00, 2017-03-02 00:00:00, 2017-07-23 00:00:00, 2017-06-30 00:00:00, 2017-05-02 00:00:00, 2017-06-24 00:00:00, 2017-04-13 00:00:00, 2017-08-08 00:00:00, 2017-03-03 00:00:00, 2017-09-15 00:00:00, 2017-06-27 00:00:00, 2017-08-03 00:00:00, 2017-08-07 00:00:00, 2017-09-04 00:00:00, 2017-09-25 00:00:00, 2017-09-29 00:00:00, 2017-04-04 00:00:00, 2017-04-02 00:00:00, 2017-04-07 00:00:00, 2017-03-09 00:00:00, 2017-06-02 00:00:00, 2017-06-12 00:00:00, 2017-04-08 00:00:00, 2017-07-18 00:00:00, 2017-07-24 00:00:00, 2017-08-15 00:00:00, 2017-03-15 00:00:00, 2017-07-07 00:00:00, 2017-03-08 00:00:00, 2017-08-05 00:00:00, 2017-05-28 00:00:00, 2017-04-23 00:00:00, 2017-03-28 00:00:00, 2017-03-11 00:00:00, 2017-03-19 00:00:00, 2017-06-13 00:00:00, 2017-03-21 00:00:00, 2017-07-01 00:00:00, ...}","[37.807984129, 55.726758653]",0.0,0.0,0.0,0.284173,0.577806,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.230449,0.0,0.0,0.0,0.059235,0.0
3,797949425,2017-03-01T00:00:00,2017-09-30T00:00:00,6.6,"{'type': 'Point', 'coordinates': [37.786873, 55.762276]}","{2017-07-05 00:00:00, 2017-03-25 00:00:00, 2017-06-16 00:00:00, 2017-09-11 00:00:00, 2017-08-12 00:00:00, 2017-04-25 00:00:00, 2017-08-23 00:00:00, 2017-06-07 00:00:00, 2017-06-08 00:00:00, 2017-09-26 00:00:00, 2017-03-07 00:00:00, 2017-09-09 00:00:00, 2017-08-17 00:00:00, 2017-03-23 00:00:00, 2017-05-12 00:00:00, 2017-06-05 00:00:00, 2017-08-29 00:00:00, 2017-05-24 00:00:00, 2017-07-02 00:00:00, 2017-07-19 00:00:00, 2017-09-01 00:00:00, 2017-08-30 00:00:00, 2017-08-01 00:00:00, 2017-08-16 00:00:00, 2017-04-14 00:00:00, 2017-06-14 00:00:00, 2017-07-08 00:00:00, 2017-06-19 00:00:00, 2017-03-06 00:00:00, 2017-09-22 00:00:00, 2017-09-18 00:00:00, 2017-08-04 00:00:00, 2017-03-05 00:00:00, 2017-09-06 00:00:00, 2017-06-21 00:00:00, 2017-04-27 00:00:00, 2017-05-15 00:00:00, 2017-04-18 00:00:00, 2017-03-30 00:00:00, 2017-07-28 00:00:00, 2017-09-07 00:00:00, 2017-04-30 00:00:00, 2017-09-12 00:00:00, 2017-04-09 00:00:00, 2017-06-29 00:00:00, 2017-06-22 00:00:00, 2017-09-27 00:00:00, 2017-09-30 00:00:00, 2017-04-06 00:00:00, 2017-04-19 00:00:00, 2017-06-10 00:00:00, 2017-04-24 00:00:00, 2017-06-17 00:00:00, 2017-05-13 00:00:00, 2017-04-21 00:00:00, 2017-08-26 00:00:00, 2017-05-26 00:00:00, 2017-06-09 00:00:00, 2017-09-16 00:00:00, 2017-05-03 00:00:00, 2017-05-25 00:00:00, 2017-08-24 00:00:00, 2017-09-23 00:00:00, 2017-03-02 00:00:00, 2017-07-23 00:00:00, 2017-06-30 00:00:00, 2017-05-02 00:00:00, 2017-06-24 00:00:00, 2017-04-13 00:00:00, 2017-08-08 00:00:00, 2017-03-03 00:00:00, 2017-09-15 00:00:00, 2017-06-27 00:00:00, 2017-08-03 00:00:00, 2017-08-07 00:00:00, 2017-09-04 00:00:00, 2017-09-25 00:00:00, 2017-09-29 00:00:00, 2017-04-04 00:00:00, 2017-04-02 00:00:00, 2017-04-07 00:00:00, 2017-03-09 00:00:00, 2017-06-02 00:00:00, 2017-06-12 00:00:00, 2017-04-08 00:00:00, 2017-07-18 00:00:00, 2017-07-24 00:00:00, 2017-08-15 00:00:00, 2017-03-15 00:00:00, 2017-07-07 00:00:00, 2017-03-08 00:00:00, 2017-08-05 00:00:00, 2017-05-28 00:00:00, 2017-04-23 00:00:00, 2017-03-28 00:00:00, 2017-03-11 00:00:00, 2017-03-19 00:00:00, 2017-06-13 00:00:00, 2017-03-21 00:00:00, 2017-07-01 00:00:00, ...}","[37.786873, 55.762276]",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.476229,0.0,0.0,0.0,0.368473,0.0
4,797949570,2017-03-01T00:00:00,2017-09-30T00:00:00,5.0,"{'type': 'Point', 'coordinates': [37.775102, 55.806463]}","{2017-07-05 00:00:00, 2017-03-25 00:00:00, 2017-06-16 00:00:00, 2017-09-11 00:00:00, 2017-08-12 00:00:00, 2017-04-25 00:00:00, 2017-08-23 00:00:00, 2017-06-07 00:00:00, 2017-06-08 00:00:00, 2017-09-26 00:00:00, 2017-03-07 00:00:00, 2017-09-09 00:00:00, 2017-08-17 00:00:00, 2017-03-23 00:00:00, 2017-05-12 00:00:00, 2017-06-05 00:00:00, 2017-08-29 00:00:00, 2017-05-24 00:00:00, 2017-07-02 00:00:00, 2017-07-19 00:00:00, 2017-09-01 00:00:00, 2017-08-30 00:00:00, 2017-08-01 00:00:00, 2017-08-16 00:00:00, 2017-04-14 00:00:00, 2017-06-14 00:00:00, 2017-07-08 00:00:00, 2017-06-19 00:00:00, 2017-03-06 00:00:00, 2017-09-22 00:00:00, 2017-09-18 00:00:00, 2017-08-04 00:00:00, 2017-03-05 00:00:00, 2017-09-06 00:00:00, 2017-06-21 00:00:00, 2017-04-27 00:00:00, 2017-05-15 00:00:00, 2017-04-18 00:00:00, 2017-03-30 00:00:00, 2017-07-28 00:00:00, 2017-09-07 00:00:00, 2017-04-30 00:00:00, 2017-09-12 00:00:00, 2017-04-09 00:00:00, 2017-06-29 00:00:00, 2017-06-22 00:00:00, 2017-09-27 00:00:00, 2017-09-30 00:00:00, 2017-04-06 00:00:00, 2017-04-19 00:00:00, 2017-06-10 00:00:00, 2017-04-24 00:00:00, 2017-06-17 00:00:00, 2017-05-13 00:00:00, 2017-04-21 00:00:00, 2017-08-26 00:00:00, 2017-05-26 00:00:00, 2017-06-09 00:00:00, 2017-09-16 00:00:00, 2017-05-03 00:00:00, 2017-05-25 00:00:00, 2017-08-24 00:00:00, 2017-09-23 00:00:00, 2017-03-02 00:00:00, 2017-07-23 00:00:00, 2017-06-30 00:00:00, 2017-05-02 00:00:00, 2017-06-24 00:00:00, 2017-04-13 00:00:00, 2017-08-08 00:00:00, 2017-03-03 00:00:00, 2017-09-15 00:00:00, 2017-06-27 00:00:00, 2017-08-03 00:00:00, 2017-08-07 00:00:00, 2017-09-04 00:00:00, 2017-09-25 00:00:00, 2017-09-29 00:00:00, 2017-04-04 00:00:00, 2017-04-02 00:00:00, 2017-04-07 00:00:00, 2017-03-09 00:00:00, 2017-06-02 00:00:00, 2017-06-12 00:00:00, 2017-04-08 00:00:00, 2017-07-18 00:00:00, 2017-07-24 00:00:00, 2017-08-15 00:00:00, 2017-03-15 00:00:00, 2017-07-07 00:00:00, 2017-03-08 00:00:00, 2017-08-05 00:00:00, 2017-05-28 00:00:00, 2017-04-23 00:00:00, 2017-03-28 00:00:00, 2017-03-11 00:00:00, 2017-03-19 00:00:00, 2017-06-13 00:00:00, 2017-03-21 00:00:00, 2017-07-01 00:00:00, ...}","[37.775102, 55.806463]",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.418683,0.597498,0.0,0.0,0.0,0.406763,0.0,0.0,0.0,0.358154,0.0


In [11]:
exams_ecology = exams_lite[['global_id', 'start', 'end', 'air', 'autostations', 'bad_gasstations', 'capital',
                            'city_places', 'clinics', 'dirt', 'factories', 'gasstations', 'house_lines', 'markets',
                            'mean', 'noise', 'parks', 'roads', 'streets', 'stroi', 'transports', 'stupid']]
save_dataset('exams_ecology')

Сохранено 5542 записей в exams_ecology.gz


In [14]:
exams_ecology.to_csv('exams_ecology.csv', index=False, encoding='utf-8')