# Exploring opensky states data

In [1]:
import math
import pandas as pd
import numpy as np
from scipy.stats import stats, norm, skew
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, ElasticNet, Lasso
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.utils.np_utils import to_categorical
from keras.wrappers.scikit_learn import KerasRegressor
from scipy.special import boxcox1p
import lightgbm as lgb
import xgboost as xgb

%matplotlib inline
np.random.seed(2)

Using TensorFlow backend.


In [2]:
dataset0 = pd.read_csv('../input/states_2019-01-14-00.csv')
print('dataset0 loaded')
dataset1 = pd.read_csv('../input/states_2019-01-14-01.csv')
print('dataset1 loaded')
#dataset2 = pd.read_csv('../input/states_2019-01-14-02.csv')
#print('dataset2 loaded')
#dataset3 = pd.read_csv('../input/states_2019-01-14-03.csv')
#print('dataset3 loaded')
#dataset4 = pd.read_csv('../input/states_2019-01-14-04.csv')
#print('dataset4 loaded')
#dataset5 = pd.read_csv('../input/states_2019-01-14-05.csv')
#print('dataset5 loaded')
#dataset = pd.concat([dataset0, dataset1, dataset2, dataset3, dataset4, dataset5], axis=0)
dataset = pd.concat([dataset0, dataset1], axis=0)

dataset0 loaded
dataset1 loaded


In [3]:
dataset[0:5]


Unnamed: 0,time,icao24,lat,lon,velocity,heading,vertrate,callsign,onground,alert,spi,squawk,baroaltitude,geoaltitude,lastposupdate,lastcontact
0,1547424000,4ca1fe,51.671308,2.311249,127.868055,288.289158,-4.8768,RYR4WA,False,False,False,3117.0,7757.16,7620.0,1547424000.0,1547424000.0
1,1547424000,4b187d,40.537491,-73.051758,204.903762,76.940691,10.07872,SWR17P,False,False,False,1547.0,4099.56,4091.94,1547424000.0,1547424000.0
2,1547424000,a776b2,41.119446,-71.720581,198.12774,246.430252,3.90144,JBU321,False,False,False,2076.0,10195.56,10088.88,1547424000.0,1547424000.0
3,1547424000,40762c,51.964233,8.027344,227.736637,45.732175,0.0,NPT4069,False,False,False,7130.0,11277.6,10904.22,1547424000.0,1547424000.0
4,1547424000,a278f3,40.795402,-73.304714,222.625901,48.278499,0.0,CNS818,False,False,False,3055.0,7010.4,6964.68,1547424000.0,1547424000.0


## Dropping useless columns

In [4]:
dataset = dataset.drop('squawk', axis=1)
dataset = dataset.drop('time', axis=1)
dataset = dataset.drop('lastcontact', axis=1)
dataset = dataset.drop('spi', axis=1)

In [5]:
dataset.isnull().sum()

icao24                 0
lat               837627
lon               837627
velocity          801030
heading           801030
vertrate          786889
callsign          722680
onground               0
alert                  0
baroaltitude      313930
geoaltitude      1080490
lastposupdate     837627
dtype: int64

## Dropping useless rows

In [6]:
dataset = dataset.dropna(axis=0, subset=['lat', 'lon'])
dataset = dataset.dropna(axis=0, subset=['callsign'])
dataset.isnull().sum()

icao24                0
lat                   0
lon                   0
velocity          69209
heading           69209
vertrate          68536
callsign              0
onground              0
alert                 0
baroaltitude      85222
geoaltitude      207463
lastposupdate         0
dtype: int64

## Filling barometric altitude holes with geoaltitudes and removing the remaninig nulls

In [7]:
dataset['altitude'] = dataset['baroaltitude']
dataset['altitude'] = dataset['altitude'].fillna(dataset['geoaltitude'])
dataset = dataset.dropna(axis=0, subset=['altitude'])
dataset = dataset.drop('geoaltitude', axis=1)
dataset = dataset.drop('baroaltitude', axis=1)
dataset.isnull().sum()

icao24               0
lat                  0
lon                  0
velocity         16820
heading          16820
vertrate         16181
callsign             0
onground             0
alert                0
lastposupdate        0
altitude             0
dtype: int64

## Data cleaned

The remaining nulls could be filled via interpolation once the dataset is splitted over the `icao24` field values

## Plotting some flights

In [12]:
flight_paths = []

groups = dataset.groupby('icao24')
print(len(groups.groups), 'groups')
max_num = 1
num_groups = len(groups)
for k, i in groups:
    num_groups -= 1
    if num_groups % 100 == 0:
        print(num_groups, 'remaining')
    if i['onground'].where(i['onground'] == True).count() > 0:
        # loops over all the points sent by the aircraft ands to flight_paths
        # all the sequences of points delimited by two landings: onground=True
        last_onground_index = -1
        cur_flight = pd.DataFrame(columns = ['lat', 'lon'])
        print('removing cur_flight with shape', cur_flight.shape)
        for index, el in i.iterrows():
            if not el['onground']:
                if last_onground_index == -1:
                    continue # flight never started
                else:
                    cur_flight = cur_flight.append( el[['lat', 'lon']] )
            else: # ...point is on ground
                if last_onground_index == -1:
                    cur_flight = cur_flight.append( el[['lat', 'lon']] )
                    last_onground_index = index
                else:
                    cur_flight = cur_flight.append( el[['lat', 'lon']] )
                    if cur_flight.shape[0] > 1000:
                        flight_paths.append(cur_flight.values.tolist())
                        print('Flight discovered!', cur_flight.shape, 'points')
                        cur_flight = pd.DataFrame(columns = ['lat', 'lon'])
                    
    if len(flight_paths) >= max_num:
        break
    else:
        continue

print('Found ', len(flight_paths), ' paths')

flight_paths 

10243 groups
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
10200 remaining
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
10100 remaining
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
remov

removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
8700 remaining
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
8600 remaining
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight 

removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
7000 remaining
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 

removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
6000 remaining
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 

removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
5000 remaining
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
4900 remaining
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight 

removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
4000 remaining
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 

removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
3200 remaining
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 

removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
2400 remaining
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 

removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
1400 remaining
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 

removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur_flight with shape (0, 2)
removing cur

[]

In [13]:
from ipyleaflet import Map, Polyline

print(len(flight_paths))

av_lat, av_lon = 38.94551467895508, -77.46537581734037
print(av_lat, av_lon)

line = Polyline(
    locations = [flight_paths],
    color = "green",
    fill=False,
    weight=2)
m = Map(center = (av_lat, av_lon), zoom = 3)
m.add_layer(line)
m.layout.width = '100%'
m.layout.height = '800px'
m

0
38.94551467895508 -77.46537581734037


Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …