<a href="https://colab.research.google.com/github/avadhutc/P2S10/blob/master/Self_Driving_Car_TD3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import math



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class Flatten(torch.nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)

class Actor(nn.Module):
    def __init__(self, state_dim, action_dim, max_action, latent_dim):
        super(Actor, self).__init__()

        self.encoder = torch.nn.ModuleList([ 
            torch.nn.Conv2d(1, 16, 3),   
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(16),
            torch.nn.Conv2d(16, 32, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),

            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(32, 10, 1),

            torch.nn.Conv2d(10, 32, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),
            torch.nn.Conv2d(32, 64, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(64),

            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(64, 32, 1),

            torch.nn.Conv2d(32, 64, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(64),
            torch.nn.Conv2d(64, 128, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(128),

            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(128, 64, 1),

            torch.nn.AvgPool2d(6),
            Flatten(),  ## output: 64
        ])

        self.linear = torch.nn.ModuleList([
            torch.nn.Linear(latent_dim, 30),
            torch.nn.ReLU(),
            torch.nn.Linear(30, action_dim),
            torch.nn.Tanh(),
        ])

        self.max_action = max_action

    def forward(self, x):

        for layer in self.encoder:
            #print(layer)
            #print("input:", x.size())
            x = layer(x)
            #print("output:", x.size())
        
        for layer in self.linear:
            x = layer(x)
            #print("unclipped action: ", x)
            
        x = self.max_action * x
        #print(x)
        return x
		
class Critic(nn.Module):
    def __init__(self, state_dim, action_dim, latent_dim):
        super(Critic, self).__init__()

        self.encoder_1 = torch.nn.ModuleList([  
            torch.nn.Conv2d(1, 16, 3),   
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(16),
            torch.nn.Conv2d(16, 32, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),

            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(32, 10, 1),

            torch.nn.Conv2d(10, 32, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),
            torch.nn.Conv2d(32, 64, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(64),

            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(64, 32, 1),

            torch.nn.Conv2d(32, 64, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(64),
            torch.nn.Conv2d(64, 128, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(128),

            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(128, 64, 1),

            torch.nn.AvgPool2d(6),
            Flatten(),  
        ])

        self.linear_1 = torch.nn.ModuleList([
            torch.nn.Linear(latent_dim + action_dim, 30),
            torch.nn.ReLU(),
            torch.nn.Linear(30, 1),
        ])

        self.encoder_2 = torch.nn.ModuleList([  
            torch.nn.Conv2d(1, 16, 3),   
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(16),
            torch.nn.Conv2d(16, 32, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),

            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(32, 10, 1),

            torch.nn.Conv2d(10, 32, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),
            torch.nn.Conv2d(32, 64, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(64),

            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(64, 32, 1),

            torch.nn.Conv2d(32, 64, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(64),
            torch.nn.Conv2d(64, 128, 3),  
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(128),

            torch.nn.MaxPool2d(kernel_size=2, stride=2),
            torch.nn.Conv2d(128, 64, 1),

            torch.nn.AvgPool2d(6),
            Flatten(),  
        ])

        self.linear_2 = torch.nn.ModuleList([
            torch.nn.Linear(latent_dim + action_dim, 30),
            torch.nn.ReLU(),
            torch.nn.Linear(30, 1),
        ])

    def forward(self, x, u):
        #print("entered critic")
        x1 = x
        for layer in self.encoder_1:
            x1 = layer(x1)
        #   print(x1.size())
        counter = 0
        for layer in self.linear_1:
            counter += 1
            if counter == 1:
                x1 = torch.cat([x1, u], 1)
                x1 = layer(x1)
            else:
                x1 = layer(x1)

        x2 = x
        for layer in self.encoder_2:
            x2 = layer(x2)
        counter = 0
        for layer in self.linear_2:
            counter += 1
            if counter == 1:
                x2 = torch.cat([x2, u], 1)
                x2 = layer(x2)
            else:
                x2 = layer(x2)

        return x1, x2

    def Q1(self, x, u):

        for layer in self.encoder_1:
            x = layer(x)

        counter = 0
        for layer in self.linear_1:
            counter += 1
            if counter == 1:
                x = torch.cat([x, u], 1)
                x = layer(x)
            else:
                x = layer(x)

        return x
        
class TD3(object):

    def __init__(self, state_dim, action_dim, max_action, latent_dim):
        self.actor = Actor(state_dim, action_dim, max_action, latent_dim).to(device)
        self.actor_target = Actor(state_dim, action_dim, max_action, latent_dim).to(device)
        self.actor_target.load_state_dict(self.actor.state_dict())
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters())
        self.critic = Critic(state_dim, action_dim, latent_dim).to(device)
        self.critic_target = Critic(state_dim, action_dim, latent_dim).to(device)
        self.critic_target.load_state_dict(self.critic.state_dict())
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters())
        self.max_action = max_action
        
    def select_action(self, state):
        #state = torch.Tensor(state.reshape(1, -1)).to(device)
        state = torch.Tensor(state).unsqueeze(0).to(device) #add batch info
        state = state * 100 # temperature
        return self.actor(state).cpu().data.numpy().flatten()

        #probs = F.softmax(self.model(Variable(state, volatile = True))*100) # T=100
        #action = probs.multinomial(1)
        #return action.data[0,0]

    def train(self, replay_buffer, iterations, batch_size=100, discount=0.99, tau=0.005, policy_noise=0.2, noise_clip=0.5, policy_freq=2):
        
        for it in range(iterations):
                
            # Step 4: We sample a batch of transitions (s, s’, a, r) from the memory
            batch_states, batch_next_states, batch_actions, batch_rewards, batch_dones = replay_buffer.sample(batch_size)
            state = torch.Tensor(batch_states).to(device)
            next_state = torch.Tensor(batch_next_states).to(device)
            action = torch.Tensor(batch_actions).to(device)
            reward = torch.Tensor(batch_rewards).to(device)
            done = torch.Tensor(batch_dones).to(device)
            #print("iteration: ", it)
            # Step 5: From the next state s’, the Actor target plays the next action a’
            next_action = self.actor_target(next_state)
            
            # Step 6: We add Gaussian noise to this next action a’ and we clamp it in a range of values supported by the environment
            noise = torch.Tensor(batch_actions).data.normal_(0, policy_noise).to(device)
            noise = noise.clamp(-noise_clip, noise_clip)
            next_action = (next_action + noise).clamp(-self.max_action, self.max_action)
            
            # Step 7: The two Critic targets take each the couple (s’, a’) as input and return two Q-values Qt1(s’,a’) and Qt2(s’,a’) as outputs
            target_Q1, target_Q2 = self.critic_target(next_state, next_action)
            
            # Step 8: We keep the minimum of these two Q-values: min(Qt1, Qt2)
            target_Q = torch.min(target_Q1, target_Q2)
            
            # Step 9: We get the final target of the two Critic models, which is: Qt = r + γ * min(Qt1, Qt2), where γ is the discount factor
            target_Q = reward + ((1 - done) * discount * target_Q).detach()
            
            # Step 10: The two Critic models take each the couple (s, a) as input and return two Q-values Q1(s,a) and Q2(s,a) as outputs
            current_Q1, current_Q2 = self.critic(state, action)
            
            # Step 11: We compute the loss coming from the two Critic models: Critic Loss = MSE_Loss(Q1(s,a), Qt) + MSE_Loss(Q2(s,a), Qt)
            critic_loss = F.mse_loss(current_Q1, target_Q) + F.mse_loss(current_Q2, target_Q)
            
            # Step 12: We backpropagate this Critic loss and update the parameters of the two Critic models with a SGD optimizer
            self.critic_optimizer.zero_grad()
            critic_loss.backward()
            self.critic_optimizer.step()
            
            # Step 13: Once every two iterations, we update our Actor model by performing gradient ascent on the output of the first Critic model
            if it % policy_freq == 0:
                actor_loss = -self.critic.Q1(state, self.actor(state)).mean()
                self.actor_optimizer.zero_grad()
                actor_loss.backward()
                self.actor_optimizer.step()
                
                # Step 14: Still once every two iterations, we update the weights of the Actor target by polyak averaging
                for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):
                    target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)
                
                # Step 15: Still once every two iterations, we update the weights of the Critic target by polyak averaging
                for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):
                    target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)
    
    # Making a save method to save a trained model
    def save(self, filename, directory):
        torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, filename))
        torch.save(self.critic.state_dict(), '%s/%s_critic.pth' % (directory, filename))
    
    # Making a load method to load a pre-trained model
    def load(self, filename, directory):
        self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, filename)))
        self.critic.load_state_dict(torch.load('%s/%s_critic.pth' % (directory, filename)))


In [0]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import math


#define replay buffer
class ReplayBuffer(object):
    def __init__(self, max_size=1e6):
        self.storage = []
        self.max_size = max_size
        self.ptr = 0

    def add(self, transition):
        if len(self.storage) == self.max_size:
            self.storage[int(self.ptr)] = transition
            self.ptr = (self.ptr + 1) % self.max_size
        else:
            self.storage.append(transition)

    def sample(self, batch_size):
        ind = np.random.randint(0, len(self.storage), size=batch_size)
        batch_states, batch_next_states, batch_actions, batch_rewards, batch_dones = [], [], [], [], []
        for i in ind: 
            state, next_state, action, reward, done = self.storage[i]
            #state, next_state, action, reward = self.storage[i]
            batch_states.append(np.array(state, copy=False))
            batch_next_states.append(np.array(next_state, copy=False))
            batch_actions.append(np.array(action, copy=False))
            batch_rewards.append(np.array(reward, copy=False))
            batch_dones.append(np.array(done, copy=False))
        return np.array(batch_states), np.array(batch_next_states), np.array(batch_actions), np.array(batch_rewards).reshape(-1, 1), np.array(batch_dones).reshape(-1, 1)

In [0]:
#:kivy 1.0.9
# ref: https://kivy.org/docs/tutorials/pong.html

<Car>:
    size: 20, 10
    origin: 10, 5
    canvas:
        PushMatrix
        Rotate:
            angle: self.angle
            origin: self.center
        Rectangle:
            pos: self.pos
            size: self.size
            source: "./images/car.png"
        PopMatrix



<Game>:
    car: game_car
    
    canvas:
        Rectangle:
            pos: self.pos
            size: 1429, 660
            source: "./images/citymap.png"

    Car:
        id: game_car
        center: self.parent.center
    

In [0]:
# Self Driving Car
import numpy as np
from random import random, randint
import matplotlib.pyplot as plt
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from collections import deque
import time
import random
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import torch.nn.functional as F
import os
import math
#from torchvision import transforms

from td3_cnn import TD3
from td3_utilities import ReplayBuffer

# Importing the Kivy packages
from kivy.app import App
from kivy.uix.widget import Widget
from kivy.uix.button import Button
from kivy.graphics import Color, Ellipse, Line
from kivy.config import Config
from kivy.properties import NumericProperty, ReferenceListProperty, ObjectProperty
from kivy.vector import Vector
from kivy.clock import Clock
from kivy.core.image import Image as CoreImage
from PIL import Image as PILImage
from kivy.graphics.texture import Texture
from PIL import ImageDraw

# Adding this line if we don't want the right click to put a red point
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')
Config.set('graphics', 'resizable', False)
Config.set('graphics', 'width', '1429')
Config.set('graphics', 'height', '660')


# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
last_x = 0
last_y = 0
n_points = 0
length = 0
max_action = 1
save_models = True # Boolean checker whether or not to save the pre-trained model

env_name = "car_racing"
file_name = "%s_%s" % ("TD3", env_name)
print ("---------------------------------------")
print ("Settings: %s" % (file_name))
print ("---------------------------------------")

if save_models and not os.path.exists("./pytorch_models"):
  os.makedirs("./pytorch_models")
  
directory = "pytorch_models"

#function to extract car image
def extract_car(x, y, width, height, angle):
        car_ = np.array([(0, 0), (width, 0), (width, height), (0, height), (0, 0)])
        theta = (np.pi / 180.0) * angle
        R = np.array([[np.cos(theta), -np.sin(theta)],
                    [np.sin(theta), np.cos(theta)]])
        car_offset = np.array([x, y])
        cropped_car = np.dot(car_, R) + car_offset
        return cropped_car

state_dim = 80
action_dim = 1
latent_dim = 64

brain = TD3(state_dim,action_dim,max_action,latent_dim)
replay_buffer = ReplayBuffer()
last_reward = 0
scores = []

im = CoreImage("./images/testsand.png")

# Initializing the map
first_update = True
def init():
    global sand
    global goal_x
    global goal_y
    global first_update
    sand = np.zeros((longueur,largeur))
    print("Before Padding:", sand.shape)
    img = PILImage.open("./images/mask.png").convert('L')
    sand = np.asarray(img)/255
    sand = np.pad(sand, 80, 'constant', constant_values = 1)    
    print("After Padding:", sand.shape)          
    goal_x = 1420
    goal_y = 622
    first_update = False
    global swap
    swap = 0


# Initializing the last distance
last_distance = 0


# Creating the car class
class Car(Widget):
    
    angle = NumericProperty(0)
    rotation = NumericProperty(0)
    velocity_x = NumericProperty(0)
    velocity_y = NumericProperty(0)
    velocity = ReferenceListProperty(velocity_x, velocity_y)
    
    def move(self, rotation):
        self.pos = Vector(*self.velocity) + self.pos
        self.rotation = float(rotation)
        self.angle = self.angle + self.rotation
        
        self.dummycar = Vector(0, 0).rotate(self.angle) + self.pos
        a = self.dummycar
        #print(a)               
        img_tmp = PILImage.fromarray(sand.astype("uint8")*255)        
        draw = ImageDraw.Draw(img_tmp)
        extract_car_area = extract_car(x=int(a[1]+80), y=int(a[0]+80), width=10, height=20, angle = self.angle)
        draw.polygon([tuple(p) for p in extract_car_area], fill=128)

        sand1 = np.asarray(img_tmp)
        ##cropped_img = sand1[int(a[0])-60:int(a[0])+260, int(a[1])-60:int(a[1])+260] #80x80 images
        cropped_img = sand1[int(a[0])-30:int(a[0])+100, int(a[1])-30:int(a[1])+100] #80x80 images
        camera_data = np.asarray(cropped_img)        
        img_rescale = PILImage.fromarray(camera_data)        
        img_rescale = img_rescale.resize((80, 80), PILImage.ANTIALIAS)        
        camera_data = np.asarray(img_rescale)         
        ##camera_data = np.stack((camera_data,camera_data,camera_data,camera_data),axis=0)   
        camera_data = np.expand_dims(camera_data, axis=0)     
        camera_data = torch.from_numpy(camera_data).float().div(255) # normalise the image , FloatTensor type
        
        #print(camera_data.size())
        
        return camera_data


# Creating the game class

class Game(Widget):
    car = ObjectProperty(None)
    total_timesteps = 0
    episode_num = 0
    done = True
    t0 = time.time()
    max_timesteps = 500000
    #state = torch.ones([4,state_dim,state_dim]) #shape of the cropped car image
    state = torch.ones([1,state_dim,state_dim]) #shape of the cropped car image
    episode_reward = 0
    episode_timesteps = 0
    sand_counter = 0

    
    def serve_car(self):
        self.car.center = self.center
        self.car.velocity = Vector(6, 0)

    def update(self, dt):
        
        global brain
        global reward
        global scores
        global last_distance
        global goal_x
        global goal_y
        global longueur
        global largeur
        global swap
        
        #initialising variables for training:
        expl_noise = 0.5 # Exploration noise - STD value of exploration Gaussian noise
        start_timesteps = 20000 # Number of iterations/timesteps before which the model randomly chooses an action, and after which it starts to use the policy network
        batch_size = 50 # Size of the batch
        discount = 0.99 # Discount factor gamma, used in the calculation of the total discounted reward
        tau = 0.005 # Target network update rate
        policy_noise = 0.2 # STD of Gaussian noise added to the actions for the exploration purposes
        noise_clip = 0.5 # Maximum value of the Gaussian noise added to the actions (policy)
        policy_freq = 2 # Number of iterations to wait before the policy network (Actor model) is updated
        log_interval = 10  # print avg reward after interval
        # logging variables:       

        log_f = open("log.txt", "a+")
        
        longueur = self.width
        largeur = self.height
        #state = np.zeros(5)
        
        if first_update:
            init()
        
        # We start the main loop over 500,000 timesteps
        if self.total_timesteps < self.max_timesteps:
            # If the episode is done
            if self.done:
                # If we are not at the very beginning, we start the training process of the model
                if self.total_timesteps != 0:
                    print("Total Timesteps: {} Episode Num: {} Reward: {}".format(self.total_timesteps,self.episode_num, self.episode_reward))
                if self.total_timesteps > start_timesteps:
                    print("Training for steps: ", self.episode_timesteps)
                    start_time = time.time()
                    brain.train(replay_buffer, self.episode_timesteps, batch_size, discount, tau, policy_noise, noise_clip, policy_freq)
                    print("Time in minutes: ", round((time.time() - start_time)/60))
                #reset set state dimenssion elements once episode is done
                
                self.car.center[0] = self.center[0] + np.random.randint(-700,700)
                self.car.center[1] = self.center[1] + np.random.randint(-300,300)
                
                self.car.velocity = Vector(6, 0)
                xx = goal_x - self.car.x
                yy = goal_y - self.car.y
                orientation = Vector(*self.car.velocity).angle((xx,yy))/180.

                #initialise 1st state after done, move it towards orientaation
                
                self.state = self.car.move(random.choice((-orientation,orientation))) ##why random choice
                
                self.done = False

                # Set rewards and episode timesteps to zero
                self.episode_reward = 0
                self.episode_timesteps = 0
                self.episode_num += 1
                self.sand_counter = 0

            
            action = brain.select_action(self.state)            
            print("previous action:", action)
            if expl_noise != 0:
                action = (action + np.random.normal(0, expl_noise)).clip(-max_action, max_action)
                

            print("action clipped:", action)
            # The agent performs the action in the environment, then reaches the next state and receives the reward

            new_state = self.car.move(action[0])          
            
            #set new_state dimenssion elements
            xx = goal_x - self.car.x
            yy = goal_y - self.car.y
            orientation = Vector(*self.car.velocity).angle((xx,yy))/180.
            #new_state = [self.car.signal1, self.car.signal2, self.car.signal3, orientation, -orientation]
            distance = np.sqrt((self.car.x - goal_x)**2 + (self.car.y - goal_y)**2)
                        
            # evaluating reward and done
            print("sanduu:", sand.shape)
            if sand[int(self.car.x),int(self.car.y)] > 0:
                self.car.velocity = Vector(0.5, 0).rotate(self.car.angle)
                print("Total Timesteps: {} Episode Num: {} Reward: {}".format(self.total_timesteps, self.episode_num, self.episode_reward))
                print("Sand", "distance: ", distance, int(self.car.x),int(self.car.y), im.read_pixel(int(self.car.x),int(self.car.y)))
                reward = -5
                self.done = False
            

            else: # otherwise
                self.car.velocity = Vector(2, 0).rotate(self.car.angle)
                reward = 2
                print("Total Timesteps: {} Episode Num: {} Reward: {}".format(self.total_timesteps, self.episode_num, self.episode_reward))
                print("Road", "distance: ", distance, int(self.car.x),int(self.car.y), im.read_pixel(int(self.car.x),int(self.car.y)))
                if distance < last_distance:
                    reward += 10#0.5 #0.1


            if (self.car.x < 41) or (self.car.x > self.width - 21) or (self.car.y < 41) or (self.car.y > self.height - 21): #crude way to handle model failing near boundaries
                self.done = True
                reward += -0.5
            
            if distance < 50:
                reward += 10
                if swap == 1:
                    goal_x = 1420
                    goal_y = 622
                    swap = 0
                    #self.done = False
                else:
                    goal_x = 9
                    goal_y = 85
                    swap = 1
                    #self.done = True
            last_distance = distance
            
            # We increase the total reward
            self.episode_reward += reward            
            
            
            if self.episode_timesteps == 500 and self.total_timesteps<start_timesteps:
                self.done = True
            if self.episode_timesteps == 1000 and self.total_timesteps>start_timesteps:
                self.done = True
            
            #end episode if more time on sand
            if reward == -1:
                self.sand_counter +=1
            else:
                self.sand_counter = 0
            if self.sand_counter == 200:
                reward += -3
                self.done = True           


            # We store the new transition into the Experience Replay memory (ReplayBuffer)
            replay_buffer.add((self.state, new_state, action, reward, self.done))
            #print(self.state, new_state, action, reward, self.done)
            self.state = new_state
            # We update the state, the episode timestep, the total timesteps, and the timesteps since the evaluation of the policy
            #new_state = 
            self.episode_timesteps += 1
            self.total_timesteps += 1

            if self.total_timesteps % log_interval == 0:
                
                log_f.write("Total Timesteps: {}\t Episode Num: {}\t Reward: {}\n".format(self.total_timesteps, self.episode_num, self.episode_reward))
                
                log_f.flush()               


            # if avg reward > 300 then save and stop traning:
            if self.episode_reward >= 900:
                #         if episode % save_every == 0:
                print("########## Model received ###########")
                brain.save(file_name, directory)
                log_f.close()
                

            if self.total_timesteps % 100 == 0:
                if not os.path.exists(directory):
                    os.mkdir(directory)
                brain.save(file_name, directory)


class MyPaintWidget(Widget):

    def on_touch_down(self, touch):
        global length, n_points, last_x, last_y
        with self.canvas:
            Color(0.8,0.7,0)
            d = 10.
            touch.ud['line'] = Line(points = (touch.x, touch.y), width = 10)
            last_x = int(touch.x)
            last_y = int(touch.y)
            n_points = 0
            length = 0
            sand[int(touch.x),int(touch.y)] = 1
            img = PILImage.fromarray(sand.astype("uint8")*255)
            img.save("./images/sand.jpg")

    def on_touch_move(self, touch):
        global length, n_points, last_x, last_y
        if touch.button == 'left':
            touch.ud['line'].points += [touch.x, touch.y]
            x = int(touch.x)
            y = int(touch.y)
            length += np.sqrt(max((x - last_x)**2 + (y - last_y)**2, 2))
            n_points += 1.
            density = n_points/(length)
            touch.ud['line'].width = int(20 * density + 1)
            sand[int(touch.x) - 10 : int(touch.x) + 10, int(touch.y) - 10 : int(touch.y) + 10] = 1

            
            last_x = x
            last_y = y

# Adding the API Buttons (clear, save and load)

class CarApp(App):

    def build(self):
        parent = Game()
        parent.serve_car()
        #Clock.max_iteration = 5
        Clock.schedule_interval(parent.update, 1.0/60.0)
        self.painter = MyPaintWidget()
        clearbtn = Button(text = 'clear')
        savebtn = Button(text = 'save', pos = (parent.width, 0))
        loadbtn = Button(text = 'load', pos = (2 * parent.width, 0))
        clearbtn.bind(on_release = self.clear_canvas)
        savebtn.bind(on_release = self.save)
        loadbtn.bind(on_release = self.load)
        parent.add_widget(self.painter)
        parent.add_widget(clearbtn)
        parent.add_widget(savebtn)
        parent.add_widget(loadbtn)
        return parent

    def clear_canvas(self, obj):
        global sand
        self.painter.canvas.clear()
        sand = np.zeros((longueur,largeur))

    def save(self, obj):
        print("saving brain...")
        brain.save(file_name, "pytorch_models")
        #plt.plot(scores)
        #plt.show()

    def load(self, obj):
        print("loading last saved brain...")
        brain.load(file_name, "pytorch_models")

# Running the whole thing
if __name__ == '__main__':
    CarApp().run()


