Licensed under the Apache License, Version 2.0

# Copy file to COLAB local storage

In [None]:
from google.colab import auth
auth.authenticate_user()
project_id = 'contrails-predictions-external'
!gcloud config set project {project_id}
!gsutil ls

Updated property [core/project].
gs://contrails_measurement_paper_data/
gs://contrails_predictions_muac/


In [None]:
!gsutil cp gs://contrails_measurement_paper_data/dataset.parquet.gzip /content

Copying gs://contrails_measurement_paper_data/dataset.parquet.gzip...
/ [0 files][    0.0 B/248.5 MiB]                                                ==> NOTE: You are downloading one or more large file(s), which would
run significantly faster if you enabled sliced object downloads. This
feature is enabled by default but requires that compiled crcmod be
installed (see "gsutil help crcmod").

/ [1 files][248.5 MiB/248.5 MiB]                                                
Operation completed over 1 objects/248.5 MiB.                                    


In [None]:
!gunzip --suffix=.gzip /content/dataset.parquet.gzip

# Open data

In [None]:
import pandas as pd
import numpy as np
df = pd.read_parquet('/content/dataset.parquet')

In [None]:
df

Unnamed: 0,latitude,longitude,altitude_m,timestamp,aircraft_type,match
0,42.459179,-85.621165,10086.536109,1.568199e+09,B738,False
1,42.461779,-85.790222,10481.250213,1.568199e+09,B738,False
2,42.464045,-85.948090,10820.163119,1.568199e+09,B738,False
3,42.464710,-86.117779,10964.867410,1.568200e+09,B738,False
4,42.450269,-86.253707,10964.867410,1.568200e+09,B738,False
...,...,...,...,...,...,...
5,37.972475,-109.322365,10660.226731,1.574108e+09,B739,False
6,38.017960,-109.203796,10660.226731,1.574108e+09,B739,False
7,38.062994,-109.085819,10660.226731,1.574108e+09,B739,False
8,38.124279,-108.924713,10660.226731,1.574108e+09,B739,False


In [None]:
# Let's make a prediction of which flights will make contrails
# We'll predict that all flights with altitudes between 11-12 km make a contrail
# Obviously this isn't a very good prediction.
# In reality you would want to look up the weather data for each latitude/longitude/altitude/timestamp
# and make predictions based on that
predictions = (df.altitude_m > 11000) & (df.altitude_m < 12000)

In [None]:
true_positives = predictions & df.match
false_positives = predictions & ~df.match
false_negatives = ~predictions & df.match

precision = np.sum(true_positives) / (np.sum(true_positives) + np.sum(false_positives))
recall = np.sum(true_positives) / (np.sum(true_positives) + np.sum(false_negatives))

# Print the precision and recall. Obviously it's not very good but it might be better
# with a weather-based model
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")


Precision: 0.04
Recall: 0.31
