In [176]:
import numpy as np
import pandas as pd

from os import listdir
from os.path import isfile, join

from datetime import date, time, datetime

import tqdm

In [141]:
class Hurricane:
    
    def __init__(self, hurr_df, name):
        self.hurr_df = hurr_df
        self.name = name
        
    def create_lines(self):
        corrs = list(zip(self.hurr_df['LON'], self.hurr_df['LAT']))
        
        lines = []
        i = 0
        while i != len(corrs) - 1:
            lines += [((corrs[i][0], corrs[i + 1][0]), (corrs[i][1], corrs[i + 1][1]))]
            i += 1
            
        return lines
    
    def get_square(self, margin=10):
        
        lat_min = self.hurr_df['LAT'].min()
        lat_max = self.hurr_df['LAT'].max()
        
        lon_min = self.hurr_df['LON'].min()
        lon_max = self.hurr_df['LON'].max()
        
        # find slice
        lon_max_neg = self.hurr_df[self.hurr_df['LON'] <= 0.]['LON'].max()
        lon_min_pos = self.hurr_df[self.hurr_df['LON'] > 0.]['LON'].min()
        
        if self.hurr_df[(self.hurr_df['LON'] > lon_max_neg) & (self.hurr_df['LON'] < lon_min_pos)].shape[0] == 0:
            
            first_square = (-180., max(-90., lat_min - margin), 
                            lon_max_neg + margin, min(90., lat_max + margin))
            
            second_square = (lon_min_pos - margin, max(-90., lat_min - margin), 
                             180., min(90., lat_max + margin))
            
            return (first_square, second_square)
        else:
            return ((max(-180., lon_min - margin), max(-90., lat_min - margin), 
                     min(180., lon_max + margin), min(90., lat_max + margin)))
    
    def get_time(self):
        begin_time = self.hurr_df['TIME'].min()
        end_time = self.hurr_df['TIME'].max()
        
        return (begin_time, end_time)
    
    def get_name(self):
        return self.name

In [129]:
def get_coords_df(path_dir, begin_time, end_time, squares, filter_events=('Ion. corr', 'Pmax', 'Cut off', 'Retrieval')):
    result_df = pd.DataFrame(columns=['Lon', 'Lat', 'time', 'Processing'])
    
    for path_file in listdir(path_dir):
        full_path_file = join(path_dir, path_file)
        
        if not isfile(full_path_file):
            continue
            
        date_obsr = datetime(*list(map(int, path_file.split('.')[0].split('_'))), 0, 0)
        
        if (date_obsr < begin_time) or (end_time < date_obsr):
            continue
            
        obsr_df = pd.read_csv(full_path_file)
        obsr_df['time'] = obsr_df['time'].map(lambda x: datetime.combine(date_obsr, time(int(x.split(':')[0]), 0)))
        
        more = obsr_df['time'] >= begin_time
        less = obsr_df['time'] <= end_time
        valid_df = obsr_df[np.logical_and(more, less)]
        
        # filter by square
        square_df = pd.DataFrame(columns=valid_df.columns)
        for sq in squares:
            lon_min, lat_min, lon_max, lat_max = sq
            square_df = square_df.append(valid_df[(valid_df['Lon'] > lon_min) & (valid_df['Lon'] < lon_max) & \
                                                  (valid_df['Lat'] > lat_min) & (valid_df['Lat'] < lat_max)])
        
        # filter by events
        events_df = pd.DataFrame(columns=square_df.columns)
        for event in filter_events:
            events_df = events_df.append(square_df[square_df.Processing.str.contains(event)])
            
        result_df = result_df.append(events_df[['Lat','Lon', 'time', 'Processing']])
        
    return result_df
            

In [182]:
path_dir = '../data/Result_py/Hurr/Cyclon/'
files =  [join(path_dir, f) for f in listdir(path_dir) if isfile(join(path_dir, f))]

In [183]:
hurricanes = []

for path_file in files:
    hurr_df = pd.read_csv(path_file)
    
    try: 
        hurr_df['TIME'] = hurr_df['TIME'].map(lambda x: datetime.strptime("2014/" + x, "%Y/%m/%d/%HZ"))
    except ValueError: 
        print ("Неправильная дата")
        continue   
        
    hurricanes += [Hurricane(hurr_df, path_file.split("/")[-1].split(".")[0])]
        

In [187]:
path_coord_files = '..//data//Result_py//Coords//'

result_df = pd.DataFrame(columns=['Name', \
                                  'Ion. corr before', 'Pmax before', 'CutOff before', \
                                  'Ion. corr in time', 'Pmax in time', 'CutOff in time', \
                                  'Ion. corr after', 'Pmax after', 'CutOff after'])

i = 0
for hurr in tqdm.tqdm(hurricanes):
    begin_time, end_time = hurr.get_time()
    diff_time = end_time - begin_time

    first_time = (begin_time - diff_time, begin_time)
    second_time = (begin_time, end_time)
    third_time = (end_time, end_time + diff_time)

    squares = hurr.get_square()
    
    first_coords = get_coords_df(path_coord_files, first_time[0], first_time[1], squares)
    second_coords = get_coords_df(path_coord_files, second_time[0], second_time[1], squares)
    third_coords = get_coords_df(path_coord_files, third_time[0], third_time[1], squares)
    
    name = hurr.get_name()
    
    ion_corr_b = first_coords[first_coords.Processing.str.contains('Ion. corr')].shape[0] / first_coords.shape[0]
    pmax_b = first_coords[first_coords.Processing.str.contains('Pmax')].shape[0] / first_coords.shape[0]
    cutoff_b = first_coords[first_coords.Processing.str.contains('Cut off')].shape[0] / first_coords.shape[0]
    
    ion_corr_i = second_coords[second_coords.Processing.str.contains('Ion. corr')].shape[0] / second_coords.shape[0]
    pmax_i = second_coords[second_coords.Processing.str.contains('Pmax')].shape[0] / second_coords.shape[0]
    cutoff_i = second_coords[second_coords.Processing.str.contains('Cut off')].shape[0] / second_coords.shape[0]
    
    ion_corr_a = third_coords[third_coords.Processing.str.contains('Ion. corr')].shape[0] / third_coords.shape[0]
    pmax_a = third_coords[third_coords.Processing.str.contains('Pmax')].shape[0] / third_coords.shape[0]
    cutoff_a = third_coords[third_coords.Processing.str.contains('Cut off')].shape[0] / third_coords.shape[0]
    
    result_df.loc[i] = (name, ion_corr_b, pmax_b, cutoff_b, ion_corr_i, pmax_i, cutoff_i, ion_corr_a, pmax_a, cutoff_a)
    i += 1







  0%|          | 0/100 [00:00<?, ?it/s][A[A[A[A[A[A





  1%|          | 1/100 [00:00<01:29,  1.11it/s][A[A[A[A[A[A





  2%|▏         | 2/100 [00:01<01:20,  1.22it/s][A[A[A[A[A[A





  3%|▎         | 3/100 [00:02<01:14,  1.30it/s][A[A[A[A[A[A





  4%|▍         | 4/100 [00:02<01:06,  1.44it/s][A[A[A[A[A[A





  5%|▌         | 5/100 [00:03<00:59,  1.60it/s][A[A[A[A[A[A





  6%|▌         | 6/100 [00:03<00:58,  1.61it/s][A[A[A[A[A[A





  7%|▋         | 7/100 [00:04<00:56,  1.64it/s][A[A[A[A[A[A





  8%|▊         | 8/100 [00:04<00:55,  1.65it/s][A[A[A[A[A[A





  9%|▉         | 9/100 [00:05<00:54,  1.68it/s][A[A[A[A[A[A





 10%|█         | 10/100 [00:05<00:52,  1.73it/s][A[A[A[A[A[A





 11%|█         | 11/100 [00:06<00:52,  1.71it/s][A[A[A[A[A[A





 12%|█▏        | 12/100 [00:06<00:50,  1.75it/s][A[A[A[A[A[A





 13%|█▎        | 13/100 [00:07<00:49,  1.76it/s][A[A[A[A[A[A





 14%

In [189]:
result_df.to_csv("../data/cyclon_table.csv")