In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.image as mpimg
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader, Dataset
import torch.utils.data as utils
from torchvision import transforms
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
real_folder = '/content/drive/MyDrive/Data/real'
df = pd.DataFrame(data=[],index=[],columns=['label'])
for i in range(20):
  for file in os.listdir(real_folder):
    df.loc[file] = 'REAL'

#print(df.head())

fake_folder = '/content/drive/MyDrive/Data/fake'
df = pd.DataFrame(data=[],index=[],columns=['label'])
for i in range(20):
  for file in os.listdir(fake_folder):
    df.loc[file] = 'FAKE'

#print(df.head())

# training Data , testing Data
from sklearn.model_selection import train_test_split
df_train, df_val = train_test_split(df,test_size=0.2)


In [None]:
print(df_train.head())
print(df_val.tail())

                 label
easy_3_1100.jpg   FAKE
easy_4_0011.jpg   FAKE
easy_6_1110.jpg   FAKE
easy_5_1100.jpg   FAKE
easy_10_0001.jpg  FAKE
                 label
easy_7_1100.jpg   FAKE
easy_8_0010.jpg   FAKE
easy_11_1111.jpg  FAKE
easy_15_0011.jpg  FAKE


In [None]:
from torchvision import transforms
transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406],std=[0.229,0.224,0.225])
])

In [None]:
from sklearn.metrics import accuracy_score
from torch.utils.data import Dataset, DataLoader

def get_lr(optimizer):
  for param_group in optimizer.param_groups:
    return param_group['lr']

In [None]:
# loading data
class DataTrain(Dataset):
   def __init__(self,df,real_folder,fake_folder,transform):

      data_list = list(df.index.values)
      label_list = list(df['label'])
      self.transform = transform
      self.data = data_list
      self.label = label_list
      self.realfolder = real_folder
      self.fakefolder = fake_folder


   def create_data_in_batches(self,real_folder,fake_folder,file,label):
       image=[]
       outputs=[]

       if label == 'REAL':
         l = 0
         image = cv2.imread(os.path.join(real_folder, file))

       elif label == 'FAKE':
         l = 1
         image = cv2.imread(os.path.join(fake_folder, file))

       if image is None:
         return None, None

       image = cv2.resize(image,(224,224), interpolation = cv2.INTER_AREA)
       image = self.transform(image)
       return image, 1

   def __getitem__(self, index):
       image, target = self.create_data_in_batches(self.realfolder,self.fakefolder,self.data[index],self.label[index])

       if image is None:
          return None, None
       return torch.FloatTensor(image), target

   def __len__(self):
       return len(self.data)

data_to_train = DataTrain(df_train, real_folder, fake_folder, transforms)
data_to_evaluate = DataTrain(df_val, real_folder, fake_folder, transforms)


In [None]:
batch_size = 128

def collate_fn(batch):
   batch = list(filter(lambda x: type(x[0]) is not type(None), batch))
   #if (len(batch)<64):
      #print('in collate:', len(batch))
   return torch.utils.data.dataloader.default_collate(batch)

train_loader = DataLoader(dataset=data_to_train, num_workers = 2, pin_memory = True, collate_fn=collate_fn,batch_size=batch_size,shuffle=True)
val_loader = DataLoader(dataset=data_to_evaluate,num_workers=1, batch_size=1, pin_memory = True)



In [None]:
import torch
import torchvision.models as models
import torch.nn as nn

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
resnet18 = models.resnet18().to(device)

model = resnet18
features = nn.Sequential(*(list(model.children())[:-2]))

for name2,params in features.named_parameters():
   params.requires_grad = True

In [None]:
class conv_net(nn.Module):
   def __init__(self):
      super(conv_net, self).__init__()
      self.backend = features
      self.avgpool = nn.AvgPool2d(kernel_size=4)
      self.fc = nn.Linear(512,1)
      self.sigmoid = nn.Sigmoid()


   def forward(self,x):

      output = self.backend(x)
      output = self.avgpool(output)
      print(output)
      output = output.view(-1,512)
      output = self.fc(output)
      output = self.sigmoid(output) # binary classification

      return output



In [None]:
def train_epochs(model, train_loader, val_loader, criterion, optimizer, epoch, epochs):
     #epoch_start = time.time()
     accs = []
     losses = []

     while epoch<=epochs:
           # set to training mode
           model.train()
           batch_losses=[]
           batch_accs=[]
           correct=0
           len_c=0
           count=0

           for i, (inputs, labels) in enumerate(train_loader):
                inputs = inputs.to(device)
                labels = labels.to(device)

                count += 1

                outputs = model(inputs)

                labels = labels.unsqueeze(1)

                n_labels = outputs.size()[0]

                match=[]

                for j in range(n_labels):
                    match.append(outputs[j].round()== labels[j].float().round())

                correct = sum(match).float()

                loss = criterion(outputs,labels.float())

                optimizer.zero_grad()

                loss.backward()

                optimizer.step()

                batch_acc = correct/batch_size
                batch_accs.append(batch_acc)
                len_c += 1

                if count%500 == 0:
                    print("epoch : {:03d}, Batch number:{:03d}, loss:{:.4f}".format(epoch, count, loss.item()),'batch accuracy: ',batch_acc)

     avg_accs = torch.mean(torch.cuda.FloatTensor(batch_accs))
     avg_losses = torch.mean(torch.cuda.FloatTensor(batch_losses))

     #print()


     train_accs.append(avg_acc)
     train_losses.append(avg_loss)
     # validation
     model.eval()
     batch_losses=[]
     batch_accs =[]
     correct =0
     count =0
     with torch.no_grad():
          for k, (inputs, labels) in enumerate(val_loader):
              inputs = inputs.to(device)
              labels = labels.to(device)

              count += 1

              outputs = model(inputs)

              labels = labels.unsqueeze(1)
              n_labels = outputs.size()[0]

              match = []
              for j in range(n_labels):
                  match.append(outputs[j].round()== labels[j].float().round())

              correct = sum(match).float()

              loss = criterion(outputs, labels.float())



              batch_losses.append(loss.item()*input_size(0))

              #batch_acc = correct/batch_size
              batch_accs.append(correct)
              len_c += 1

              if count%500 == 0:
                  print("non")

              avg_acc = torch.mean(torch.FloatTensor(batch_accs))
              avg_loss = torch.mean(torch.FloatTensor(batch_losses))

              # print()

              val_accs.append(avg_acc)
              val_losses.append(avg_loss)

              scheduler.step()
              epoch+=1
     return train_accs,train_losses,model,val_accs,val_losses,optimizer,scheduler




In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
classifier = conv_net()


criterion = nn.BCELoss().to() # binary class entropy loss

optimizer = optim.Adam(classifier.parameters(), lr=0.001)
#optimizer = optim.SGD(classifier.parameters(), lr=0.001,momentum)

from torch.optim import lr_scheduler
scheduler = lr_scheduler.StepLR(optimizer, step_size=1000,gamma=0.5)

In [None]:
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Move model to device if GPU is available during training
if device == 'cuda':
  classifier.to(device)


train_accs =[]
train_losses = []
val_accs =[]
val_losses =[]
epochs = 10
start_epoch = 1
train_accs, train_losses, val_accs, val_losses, trained_model, trained_optimizer, trained_scheduler = train_epochs(classifier, train_loader, val_loader, criterion, optimizer, start_epoch, epochs)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m

         ...,

         [[2.5671]],

         [[0.3056]],

         [[1.7566]]],


        [[[2.1467]],

         [[1.8535]],

         [[1.4958]],

         ...,

         [[2.2052]],

         [[0.0351]],

         [[1.8661]]],


        [[[2.1333]],

         [[1.2102]],

         [[1.6194]],

         ...,

         [[2.1128]],

         [[0.0458]],

         [[1.7947]]],


        ...,


        [[[1.9010]],

         [[0.9598]],

         [[1.9225]],

         ...,

         [[2.1778]],

         [[0.2837]],

         [[1.9845]]],


        [[[2.9340]],

         [[1.1320]],

         [[1.7672]],

         ...,

         [[2.5624]],

         [[0.5647]],

         [[1.5496]]],


        [[[2.3000]],

         [[1.4743]],

         [[1.5544]],

         ...,

         [[2.4650]],

         [[0.0224]],

         [[1.6126]]]], device='cuda:0', grad_fn=<AvgPool2DBackward0>)
tensor([[[[1.9013]],

         [[0.9600]],

 

In [None]:
checkpoint = {
    'epoch': epochs + 1,
    'state_dict': model.state_dict(),
    'optimizer': optimizer.state_dict(),
    'scheduler': scheduler.state_dict(),
    'train losses': train_losses,
    'train_accs': train_accs,
    'val accs': val_accs,
    'val losses': val_losses
 }

#save checkpoint

torch.save(model.state_dict(), '/content/drive/MyDrive/Data/resnetinceptionv1_epoch_32.pth')