In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sk
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.linear_model import LinearRegression                                       #linear regression
from sklearn.preprocessing import PolynomialFeatures                                    #polynomial regression
from sklearn.svm import SVR                                                             #simple vector regression
from sklearn.tree import DecisionTreeRegressor                                          #decision tree regressor
from sklearn.ensemble import RandomForestRegressor                                      #random forest regressor
import gurobipy as gp
import torch
from torch import nn

In [2]:
dataset = pd.read_csv('input/adatb_setcover_output.csv')

In [3]:
dataset.head(10)

Unnamed: 0,policy,seed,type,instance,nnodes,nlps,stime,gap,status,ndomchgs,ncutoffs,walltime,proctime,objval
0,internal:relpscost,0,small,/data/IP-DL/instances/becsogergely/adatb/setco...,1,2,0.03,0.0,optimal,0,0,0.041468,0.04147,28.0
1,internal:relpscost,0,small,/data/IP-DL/instances/becsogergely/adatb/setco...,1,8,0.12,0.0,optimal,0,0,0.117496,0.117499,25.0
2,internal:relpscost,0,small,/data/IP-DL/instances/becsogergely/adatb/setco...,1,3,0.03,0.0,optimal,0,0,0.03275,0.032753,10.0
3,internal:relpscost,0,small,/data/IP-DL/instances/becsogergely/adatb/setco...,1,2,0.05,0.0,optimal,0,0,0.046181,0.046185,23.0
4,internal:relpscost,0,small,/data/IP-DL/instances/becsogergely/adatb/setco...,1,1,0.01,0.0,optimal,0,0,0.014738,0.014741,11.0
5,internal:relpscost,0,small,/data/IP-DL/instances/becsogergely/adatb/setco...,1,26,0.24,0.0,optimal,0,0,0.250231,0.250223,17.0
6,internal:relpscost,0,small,/data/IP-DL/instances/becsogergely/adatb/setco...,1,30,0.33,0.0,optimal,0,1,0.331593,0.331595,19.0
7,internal:relpscost,0,small,/data/IP-DL/instances/becsogergely/adatb/setco...,1,37,0.35,0.0,optimal,0,1,0.367199,0.367201,30.0
8,internal:relpscost,0,small,/data/IP-DL/instances/becsogergely/adatb/setco...,1,2,0.02,0.0,optimal,0,0,0.017138,0.017119,10.0
9,internal:relpscost,0,small,/data/IP-DL/instances/becsogergely/adatb/setco...,1,1,0.02,0.0,optimal,0,0,0.01578,0.015783,32.0


In [4]:
%%capture
for i in range(5000):
    m = gp.read("./train/instance_"+str(i+1)+".lp")                          # reading the LP files
    A = m.getA()                                                             # creating sparse matrix from LP files
    B=A.toarray()                                                            # converting the sparse matrix to ndarray
    dataset.at[i,"instance"]=B                                               # writing this matrix to the dataframe
    

In [5]:
#dropping columns that seem irrelevant

dataset = dataset.drop('policy', axis=1)
dataset = dataset.drop('seed', axis=1)
dataset = dataset.drop('type', axis=1)
dataset = dataset.drop('status', axis=1)

In [None]:
%%capture
m= gp.read("./train/instance_2.lp")                          # reading the LP files
l=[]
for i in range(5000):
    m = gp.read("./train/instance_"+str(i+1)+".lp")
    c = m.getAttr("Obj",m.getVars())
    l=l+[c]


In [None]:
dataset["objective"]=l

In [None]:
dataset.head()

In [None]:
ins = dataset['instance'][0]
obj = dataset['objective'][1]
b =np.array(obj)
c= b.reshape(1,200)
print(b.shape)
print(c.shape)
print(ins.shape)

x = np.concatenate((ins, c), axis=0)
print(x.shape)

In [None]:
#Insert 'objective' to the instances

for i in range(5000):
    x=0
    ins = dataset['instance'][i]
    obj = dataset['objective'][i]
    b =np.array(obj)
    c= b.reshape(1,200)
    x = np.concatenate((ins, c), axis=0)
    dataset.at[i,"instance"]=x
    

In [None]:
#dataset.to_csv("adatb_matrixos_adathalmaz.csv")

most kezdődik a, most kezdődik a, most kezdődik a... ...z AI.
 

In [None]:
#custom dataset class,
class CustomIPDataset(torch.utils.data.Dataset):
    def __init__(self, dataFrame):
        self.instances = dataFrame['instance']
        self.target_values = dataFrame['nlps']

    def __len__(self):
        return len(self.instances)
    
    def __getitem__(self, idx):
        instance = self.instances[idx]
        target_value=self.target_values[idx]
        return instance, target_value

In [None]:
#custom dataset

#dividing the data into test and train instances
#first, lets try by the naive first 4500 train, rest 500 is test
train_df = dataset.head(4500)
test_df= dataset.tail(500)
test_df.reset_index(drop=True, inplace=True)

training_data= CustomIPDataset(train_df)
test_data= CustomIPDataset(test_df)

In [None]:
#dataloaders
train_dataloader = torch.utils.data.DataLoader(training_data, batch_size=50, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=50, shuffle=True)


In [None]:
# Set the runtime environment to GPU in
if torch.cuda.is_available() is False:
    raise Exception("GPU device not found, runtime environment should be set to GPU")
print(f"Using GPU device: {torch.cuda.get_device_name(torch.cuda.current_device())}")

#GPU usage:
gpu=1

if gpu == -1:
    device = torch.device("cpu")
else:
    cuda = "cuda:"+str(gpu)
    device = torch.device(cuda if torch.cuda.is_available() else "cpu")

In [None]:
#creating a simple-basic neural network modell, trying to reduce the parameterspace in the first layer. \n",
from torch import nn

class DenseNetwork(nn.Module):
    def __init__(self):
        super(DenseNetwork, self).__init__()
        self.flatten = nn.Flatten(start_dim=1, end_dim=-1)
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(101*200, 128),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(128, 1),
            )
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
#Initialize and move network to cuda
model = DenseNetwork().to(device)
print(model)

In [None]:
# Initialize the loss function
#loss_fn = nn.CrossEntropyLoss()
#loss_fn = nn.CrossEntropyLoss()
#loss_fn = nn.CrossEntropyLoss()
loss_fn = nn.MSELoss()

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-3)

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (x, y) in enumerate(dataloader):
        # Compute prediction and loss
        X = torch.Tensor(x).type(torch.FloatTensor).to(device)
        Y = torch.Tensor(y).type(torch.FloatTensor).to(device)
        pred = model(X)
        #print("sum(pred)")
        #print(pred)
        Y = Y.view(50,1)
        #print("sum(y)")
        #print(Y)
        loss = loss_fn(pred, Y)
        #print(loss)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            #print(len(X))
            print(f"Train loss: {loss:>11f}")
    return loss


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for x, y in dataloader:
            X = torch.Tensor(x).type(torch.FloatTensor).to(device)
            Y = torch.Tensor(y).type(torch.FloatTensor).to(device)
            Y = Y.view(50,1)
            pred = model(X)
            test_loss += loss_fn(pred, Y).item()

    test_loss /= num_batches
    correct /= size
    print(f"Avg test loss: {test_loss:>8f}" )
    return test_loss

In [None]:
epochs = 50
train_losses= []
test_losses=[]
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loss = train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loss = test_loop(test_dataloader, model, loss_fn)
    
    train_losses.append(float(train_loss))
    test_losses.append(float(test_loss))
    print("Done!")

In [None]:
#print(test_losses)

In [None]:
def plot_errors(train_errors, test_errors):
  plt.plot(train_errors, label='Train Error')
  plt.plot(test_errors, label='Test Error')
  plt.xlabel('Epoch')
  plt.ylabel('Error')
  plt.legend()
  plt.show()

train_errors = train_losses
test_errors = test_losses
plot_errors(train_errors, test_errors)