In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision import transforms
import os
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import pandas as pd

In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(DEVICE)
dev_no = torch.cuda.current_device()
print(dev_no)
dev_name = torch.cuda.get_device_name()
print(dev_name)

cuda
0
NVIDIA GeForce GTX 1650 Ti


In [3]:
def aggregatePressureLevels(dataset):
    p_levels = dataset['pressure_levels'].to_numpy()
    outer =[]
    for i in range(p_levels.shape[0]):
        inner = []
        for s in p_levels[i].split(' '):
            if s[0]=='[':
                s = s[1:]
            if s[-1]=='\n' or s[-1]==']':
                s = s[:-1]
            inner.append(float(s))
        outer.append(inner)
    p_levels = np.array(outer)
    # take average of all pressure levels
    p_levels = p_levels.mean(axis=1)
    return p_levels

In [17]:
oco_path = './data_2016_oco2/csv_folder/'
oco_jan_train = pd.read_csv(f'{oco_path}oco2_LtCO2_160101_B10206Ar_200730102710s.csv')
oco_jan_train = pd.concat([oco_jan_train,pd.read_csv(f'{oco_path}oco2_LtCO2_160102_B10206Ar_200730102859s.csv')])
oco_jan_train = pd.concat([oco_jan_train,pd.read_csv(f'{oco_path}oco2_LtCO2_160103_B10206Ar_200730102932s.csv')])
# oco_jan_train = pd.concat([oco_jan_train,pd.read_csv(f'{oco_path}oco2_LtCO2_160104_B10206Ar_200730102937s.csv')])
# oco_jan_train = pd.concat([oco_jan_train,pd.read_csv(f'{oco_path}oco2_LtCO2_160105_B10206Ar_200730103055s.csv')])
# oco_jan_train = pd.concat([oco_jan_train,pd.read_csv(f'{oco_path}oco2_LtCO2_160106_B10206Ar_200730103123s.csv')])

# FOr GOSAT
gosat_path = './gosat/2016/csv_folder/'
gosat_jan_train = pd.read_csv(f'{gosat_path}acos_LtCO2_160101_v201201_B7310A_161107211943s.csv')
gosat_jan_train = pd.concat([gosat_jan_train,pd.read_csv(f'{gosat_path}acos_LtCO2_160102_v201201_B7310A_161107211938s.csv')])
gosat_jan_train = pd.concat([gosat_jan_train,pd.read_csv(f'{gosat_path}acos_LtCO2_160103_v201201_B7310A_161107211931s.csv')])
# gosat_jan_train = pd.concat([gosat_jan_train,pd.read_csv(f'{gosat_path}acos_LtCO2_160104_v201201_B7310A_161107211923s.csv')])
# gosat_jan_train = pd.concat([gosat_jan_train,pd.read_csv(f'{gosat_path}oco2_LtCO2_160105_B10206Ar_200730103055s.csv')])
# gosat_jan_train = pd.concat([gosat_jan_train,pd.read_csv(f'{gosat_path}oco2_LtCO2_160106_B10206Ar_200730103123s.csv')])

complete_dataset = pd.concat([oco_jan_train,gosat_jan_train])
complete_dataset.drop(["Unnamed: 0",'dates','sounding_id',"pressure_weight","co2_profile_apriori"],axis= 1,inplace=True)

In [18]:
complete_dataset['pressure_levels'] = aggregatePressureLevels(complete_dataset)
complete_dataset.reset_index(inplace = True)
complete_dataset.head()

Unnamed: 0,index,latitude,longitude,time,solar_zenith_angle,sensor_zenith_angle,xco2_quality_flag,xco2,xco2_uncertainty,xco2_apriori,pressure_levels
0,0,-3.628539,-157.43341,1451606000.0,28.671078,22.46153,0,400.4499,0.39315,399.77917,504.984764
1,1,-3.639151,-157.43082,1451606000.0,28.665552,22.516706,0,400.2243,0.386413,399.7784,505.097463
2,2,-3.649849,-157.4283,1451606000.0,28.659916,22.573343,0,400.45795,0.384631,399.7777,505.305138
3,3,-3.660598,-157.42587,1451606000.0,28.654184,22.631292,0,399.8788,0.381401,399.78036,504.870992
4,4,-3.671286,-157.42354,1451606000.0,28.64842,22.689938,0,400.19666,0.379429,399.7903,504.923505


In [25]:
np.array(complete_dataset.loc[1,"xco2"])

array(400.2243)

In [26]:
TRAIN_SPLIT = 0.9
NUM_WORKERS = 16
PATH = "data_2016_oco2/csv_folder/oco2_4thjan2016.csv"




class OCO2Dataset(Dataset):
    def __init__(self, path, split = "train"):
        oco_path = './data_2016_oco2/csv_folder/'
        oco_jan_train = pd.read_csv(f'{oco_path}oco2_LtCO2_160101_B10206Ar_200730102710s.csv')
        oco_jan_train = pd.concat([oco_jan_train,pd.read_csv(f'{oco_path}oco2_LtCO2_160102_B10206Ar_200730102859s.csv')])
        oco_jan_train = pd.concat([oco_jan_train,pd.read_csv(f'{oco_path}oco2_LtCO2_160103_B10206Ar_200730102932s.csv')])
        # oco_jan_train = pd.concat([oco_jan_train,pd.read_csv(f'{oco_path}oco2_LtCO2_160104_B10206Ar_200730102937s.csv')])
        # oco_jan_train = pd.concat([oco_jan_train,pd.read_csv(f'{oco_path}oco2_LtCO2_160105_B10206Ar_200730103055s.csv')])
        # oco_jan_train = pd.concat([oco_jan_train,pd.read_csv(f'{oco_path}oco2_LtCO2_160106_B10206Ar_200730103123s.csv')])

        # FOr GOSAT
        gosat_path = './gosat/2016/csv_folder/'
        gosat_jan_train = pd.read_csv(f'{gosat_path}acos_LtCO2_160101_v201201_B7310A_161107211943s.csv')
        gosat_jan_train = pd.concat([gosat_jan_train,pd.read_csv(f'{gosat_path}acos_LtCO2_160102_v201201_B7310A_161107211938s.csv')])
        gosat_jan_train = pd.concat([gosat_jan_train,pd.read_csv(f'{gosat_path}acos_LtCO2_160103_v201201_B7310A_161107211931s.csv')])
        # gosat_jan_train = pd.concat([gosat_jan_train,pd.read_csv(f'{gosat_path}acos_LtCO2_160104_v201201_B7310A_161107211923s.csv')])
        # gosat_jan_train = pd.concat([gosat_jan_train,pd.read_csv(f'{gosat_path}oco2_LtCO2_160105_B10206Ar_200730103055s.csv')])
        # gosat_jan_train = pd.concat([gosat_jan_train,pd.read_csv(f'{gosat_path}oco2_LtCO2_160106_B10206Ar_200730103123s.csv')])

        complete_dataset = pd.concat([oco_jan_train,gosat_jan_train])
        complete_dataset.reset_index(inplace = True)
        complete_dataset['pressure_levels'] = aggregatePressureLevels(complete_dataset)
        

        oco_jan_test = pd.read_csv(f'{oco_path}oco2_LtCO2_160104_B10206Ar_200730102937s.csv')
        gosat_jan_test = pd.read_csv(f'{gosat_path}acos_LtCO2_160104_v201201_B7310A_161107211923s.csv')

        complete_test_dataset =  pd.concat([oco_jan_test,gosat_jan_test])
        complete_test_dataset.reset_index(inplace = True)
        complete_test_dataset['pressure_levels'] = aggregatePressureLevels(complete_test_dataset)

        if split == "train":
            self.df = complete_dataset
        else:
            self.df = complete_test_dataset
        self.df.drop(["Unnamed: 0",'dates','sounding_id',"pressure_weight","co2_profile_apriori"],axis= 1,inplace=True)
        self.df.dropna(inplace= True)
        self.length = len(self.df)

    def __getitem__(self,index):
        x = np.array(self.df.drop(["xco2","index"],axis = 1).iloc[index],dtype=np.float32)
        x = torch.from_numpy(x)
        y = np.array(self.df.loc[index,"xco2"],dtype=np.float32)
        y = torch.from_numpy(y)
        return x, y
    def __len__(self):    
        return self.length

In [27]:
train = OCO2Dataset(path=PATH)
test = OCO2Dataset(path=PATH,split = "test")
train_dl = DataLoader(train,shuffle=True,batch_size=BATCH_SIZE,num_workers=NUM_WORKERS,pin_memory=True)
test_dl = DataLoader(test,shuffle=True,batch_size=BATCH_SIZE,num_workers=NUM_WORKERS,pin_memory=True)
print(f"TRAIN_BATCHES: {len(train_dl)}\nTEST_BATCHES: {len(test_dl)}")

TRAIN_BATCHES: 35193
TEST_BATCHES: 10824


In [28]:
class Model(torch.nn.Module):
 
    def __init__(self):
        super(Model, self).__init__()
        self.linear = torch.nn.Linear(in_features = 9, out_features = 1) 
        #self.normalizer = torch.nn.functional.normalize(input)
        self.relu = nn.ReLU()
    def forward(self, x):
        #x = self.normalizer(x)
        y_pred = self.relu(self.linear(x))
        return y_pred

In [29]:
model = Model()

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)

In [30]:
EPOCH = 10
losses = []
for epoch in range(EPOCH):
    for step, (x, y) in enumerate(train_dl):
        pred_y = model(x)
        loss = criterion(pred_y, y)
    
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
        print('epoch {}, loss {}'.format(epoch, loss.item()))

  return F.mse_loss(input, target, reduction=self.reduction)


epoch 0, loss 3.0289724279095296e+16
epoch 0, loss 2.544553159766835e+16
epoch 0, loss 2.104044563516621e+16
epoch 0, loss 1.707259700707328e+16
epoch 0, loss 1.3550250304208896e+16
epoch 0, loss 1.0470494612488192e+16
epoch 0, loss 7828815384412160.0
epoch 0, loss 5615326679007232.0
epoch 0, loss 3814688483704832.0
epoch 0, loss 2405175511744512.0
epoch 0, loss 1358316424921088.0
epoch 0, loss 639718802849792.0
epoch 0, loss 208666120683520.0
epoch 0, loss 19379964084224.0
epoch 0, loss 159670.296875
epoch 0, loss 158946.515625
epoch 0, loss 159065.578125
epoch 0, loss 159032.09375
epoch 0, loss 159203.453125
epoch 0, loss 159832.390625
epoch 0, loss 157739.125
epoch 0, loss 160349.921875
epoch 0, loss 158825.875
epoch 0, loss 159400.34375
epoch 0, loss 159508.578125
epoch 0, loss 159097.53125
epoch 0, loss 159550.640625
epoch 0, loss 159420.6875
epoch 0, loss 160281.296875
epoch 0, loss 159105.078125
epoch 0, loss 159600.4375
epoch 0, loss 159458.484375
epoch 0, loss 159149.421875
ep

KeyboardInterrupt: 

In [None]:
plt.plot(losses)

In [None]:
losses = []
for batch in train_dl:
    x, y =   batch
    pred_y = model(x)
    loss = criterion(pred_y, y)
    losses.append(loss.item())
plt.plot(losses)