# Use Case 1 - Summary of All Flight

## Initialization

In [124]:
import os, json
import pandas as pd
import pprint

## Assumption

##### 1. One file is 1 flight id hence there is no way multiple flight in single file
##### 2. All file in json format and located under specific folder
##### 3. Normally, I will use Spark to accelerate the computation, but i choose simply dataframe because i just want to make sure at least we can extract the data into proper json format
##### 4. Due to limited time of completing exercise, I put maximum number of file which is 10 json to read into dataframe. Some of file unable to get extracted due to data cleansing issue

## Load Dataset

In [38]:
folder_path = 'C:\\Users\\dmitr\\Downloads\\scat20161015_20161021\\' # Assuming this where all json file will be stored
json_files = [pos_json for pos_json in os.listdir(folder_path) if pos_json.endswith('.json')]

## Extract Data Function

In [190]:
def extract_data(file):
    if os.path.exists(file):
        # Opening JSON file
        f = open(file)
        # Load JSON into object
        data = json.load(f)
        # Close file
        f.close()
        
        # Extract Header Data
        id = data['id']
        unique_aircraft_type = data['fpl']['fpl_base'][0]['aircraft_type']
        unique_flight_rules = data['fpl']['fpl_base'][0]['flight_rules']
        #print("id: ", id, " unique_aircraft_type: ",unique_aircraft_type, " unique_aircraft_type: ", unique_flight_rules)

        #Extract Item Data
        df = pd.DataFrame(data['plots'])
        # df['plots_max_altitude'] = df['I062/380'].apply(lambda x: x['subitem7']['altitude'])
        # df['plots_max_baro_vert_rate'] = df['I062/380'].apply(lambda x: x['subitem13']['baro_vert_rate'])
        # df['plots_max_mach'] = df['I062/380'].apply(lambda x: x['subitem27']['mach'])
        # df['plots_max_measured_flight_level'] = df['I062/136'].apply(lambda x: x['measured_flight_level'])\
        
        df_final = pd.DataFrame()
        try:
            df['plots_max_altitude'] = df['I062/380'].apply(lambda x: x['subitem7']['altitude'] if isinstance(x, dict) and 'subitem7' in x and 'altitude' in x['subitem7'] else None)
            df['plots_max_baro_vert_rate'] = df['I062/380'].apply(lambda x: x['subitem13']['baro_vert_rate'] if isinstance(x, dict) and 'subitem13' in x and 'baro_vert_rate' in x['subitem13'] else None)
            df['plots_max_mach'] = df['I062/380'].apply(lambda x: x['subitem27']['mach'] if isinstance(x, dict) and 'subitem27' in x and 'mach' in x['subitem27'] else None)
            df['plots_max_measured_flight_level'] = df['I062/136'].apply(lambda x: x['measured_flight_level'] if isinstance(x, dict) and 'measured_flight_level' in x else None)
            df_final = df[['plots_max_altitude', 'plots_max_baro_vert_rate', 'plots_max_mach', 'plots_max_measured_flight_level']]
            # print(df_final.head(5))
        except Exception as e:
            for col in df.columns:
                if 'altitude' in df[col].values:
                    df['plots_max_altitude'] = df[col].apply(lambda x: x['subitem7']['altitude'] if isinstance(x, dict) and 'subitem7' in x and 'altitude' in x['subitem7'] else None)
                elif 'baro_vert_rate' in df[col].values:
                    df['plots_max_baro_vert_rate'] = df[col].apply(lambda x: x['subitem13']['baro_vert_rate'] if isinstance(x, dict) and 'subitem13' in x and 'baro_vert_rate' in x['subitem13'] else None)
                elif 'mach' in df[col].values:
                    df['plots_max_mach'] = df[col].apply(lambda x: x['subitem27']['mach'] if isinstance(x, dict) and 'subitem27' in x and 'mach' in x['subitem27'] else None)
                elif 'measured_flight_level' in df[col].values:
                    df['plots_max_measured_flight_level'] = df[col].apply(lambda x: x['measured_flight_level'] if isinstance(x, dict) and 'measured_flight_level' in x else None)
                df_final = df[['plots_max_altitude', 'plots_max_baro_vert_rate', 'plots_max_mach', 'plots_max_measured_flight_level']]
                    # print(df_final.head(5))
                    
        total_row = df_final.shape[0]
        # print("total row", total_row)
        if(total_row == 0):
            raise Exception("no data found, please check manually if it has correct format") 
        else:
            # Get Min All Values
            df_min = df_final.min()
    
            # Get Max All Values
            df_max = df_final.max()
            result = {}
            result = df_max.to_dict()
    
            # Get Total Duration
            df['time_of_track'] = pd.to_datetime(df['time_of_track'])
            total_duration = df['time_of_track'].max() - df['time_of_track'].min()
            hours = total_duration.seconds // 3600
            minutes = (total_duration.seconds % 3600) // 60
            seconds = total_duration.seconds % 60
            formatted_duration = f"{hours}h {minutes}m {seconds}s"
    
            # Append other data to dict
            result['id'] = id
            result['plots_duration'] = formatted_duration
    
            final_result = {
                'id': result['id'],
                'plots_duration': result['plots_duration'],
                'plots_max_altitude': result['plots_max_altitude'],
                'plots_max_baro_vert_rate': result['plots_max_baro_vert_rate'],
                'plots_max_mach': result['plots_max_mach'],
                'plots_max_measured_flight_level': result['plots_max_measured_flight_level'],
            }
            
            #return value
            return final_result, unique_aircraft_type, unique_flight_rules

# This is for testing purpose
file_name = '100000.json'
flight, unique_aircraft_type, unique_flight_rules = extract_data(folder_path+file_name)
print(flight, unique_aircraft_type, unique_flight_rules)

{'id': 100000, 'plots_duration': '0h 4m 14s', 'plots_max_altitude': 33000.0, 'plots_max_baro_vert_rate': 3325.0, 'plots_max_mach': 0.728, 'plots_max_measured_flight_level': 252.0} B738 I


## Extract All JSON

In [193]:
counter_file = 0
max_file = 100
dict_result = {"flights": [], "unique_aircraft_type": [], "unique_flight_rules": []}
for file_name in json_files:
    if(counter_file < max_file):
        try:
            flight, unique_aircraft_type, unique_flight_rules = extract_data(folder_path+file_name)
            dict_result["flights"].append(flight)
            dict_result["unique_aircraft_type"].append(unique_aircraft_type)
            dict_result["unique_flight_rules"].append(unique_flight_rules)
            print("successfully extract file: ", str(folder_path+file_name))
        except Exception as e:
            print("error reading file: ", str(folder_path+file_name), str(e))
        counter_file += 1
    elif(counter_file >= max_file):
        break;

successfully extract file:  C:\Users\dmitr\Downloads\scat20161015_20161021\100000.json
successfully extract file:  C:\Users\dmitr\Downloads\scat20161015_20161021\100001.json
successfully extract file:  C:\Users\dmitr\Downloads\scat20161015_20161021\100002.json
successfully extract file:  C:\Users\dmitr\Downloads\scat20161015_20161021\100003.json
successfully extract file:  C:\Users\dmitr\Downloads\scat20161015_20161021\100004.json
successfully extract file:  C:\Users\dmitr\Downloads\scat20161015_20161021\100005.json
successfully extract file:  C:\Users\dmitr\Downloads\scat20161015_20161021\100006.json
successfully extract file:  C:\Users\dmitr\Downloads\scat20161015_20161021\100007.json
successfully extract file:  C:\Users\dmitr\Downloads\scat20161015_20161021\100008.json
successfully extract file:  C:\Users\dmitr\Downloads\scat20161015_20161021\100009.json
successfully extract file:  C:\Users\dmitr\Downloads\scat20161015_20161021\100010.json
successfully extract file:  C:\Users\dmitr\

## Print Result

In [194]:
pp = pprint.PrettyPrinter(indent=2, width=30, compact=True)
pp.pprint(dict_result)

{ 'flights': [ { 'id': 100000,
                 'plots_duration': '0h '
                                   '4m '
                                   '14s',
                 'plots_max_altitude': 33000.0,
                 'plots_max_baro_vert_rate': 3325.0,
                 'plots_max_mach': 0.728,
                 'plots_max_measured_flight_level': 252.0},
               { 'id': 100001,
                 'plots_duration': '0h '
                                   '29m '
                                   '16s',
                 'plots_max_altitude': 35000.0,
                 'plots_max_baro_vert_rate': 381.25,
                 'plots_max_mach': 0.848,
                 'plots_max_measured_flight_level': 350.0},
               { 'id': 100002,
                 'plots_duration': '0h '
                                   '43m '
                                   '40s',
                 'plots_max_altitude': 41000.0,
                 'plots_max_baro_vert_rate': 162.5,
                 'plots_max