In [None]:
from google.colab import drive       #This library is used for connecting google drive with colab
import pandas as pd                  #This library is imported for data analysis
import numpy as np                   #This library is used for working with arrays and performing various linear algebra operations

In [None]:
drive.mount('/content/gdrive')      #This library is used for mounting drive

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
def downloadHousingData():         # This function is used for download the Housing Data; This function has been called from load_Datasets() function
  !gdown --id 1xlJ0QiI22OSpbPKPWqTy5QeHP0_-KQUM    # gdown command is used for fast download of files from zip file
  import zipfile                                    # This library will deals with extracting zipfiles
  with zipfile.ZipFile("/content/housing_dataset.zip","r") as zip_ref:        
      zip_ref.extractall("/content")                #Extracting zip file content                            
  !cp /content/housing.csv "gdrive/MyDrive"         #Saving the extracted content in user's Google Drive.
  print("Housing Dataset has been succesfully downloaded and saved to your drive.")

In [None]:
def generateSyntheticData(no_of_samples=1000,no_of_features=4,noise_value=0.5):        # This function is used for generating synthetic data; This function has been called from load_Datasets() function
  import sklearn.datasets as datasts                                                  
  X,y = datasts.make_regression(n_samples=no_of_samples, n_features=no_of_features, noise=noise_value, random_state=34)  #This function generates synthetic data taking input the 
                                                                                                                       # no of samples, no of features and the noise value   
  data = pd.DataFrame(data = X[0:, 0:],                         #We are creating a dataframe with the generated data numpy array as input                           
                        index = ['' + str(i + 1)                # Row headers are being labelled as 1,2,3....  
                        for i in range(X.shape[0])],                        
                        columns = ['x_' + str(i + 1)            # Column headers are being labelled as x_1,x_2,...  
                        for i in range(X.shape[1])])
  data['y']=y                                                   # Appending the target variable y to the dataframe 
  data.to_csv('linear_regression_synthetic_data.csv')           # Converting the dataframe to CSV
  !cp /content/linear_regression_synthetic_data.csv "gdrive/MyDrive"  #Storing the csv file in google drive
  print("Synthetic Dataset has been succesfully generated and saved to your drive.")

In [None]:
def load_Datasets():                                    #User driven function which provides option to download Housing Dataset and generate Synthetic Data
  print("Enter 1 to download Housing Dataset")
  print("Enter 2 to generate Synthetic Dataset")
  inp = input("Enter your choice:")                    #Taking input from options user
  if(inp=="1"):                                        #If input taken is 1, then it calls the downloadHousingData() function and downloads the Housing Dataset 
    downloadHousingData()
  elif(inp=="2"):                                     #If input taken is 2, then it calls the  generateSyntheticData() function and generatesSynthetic Data 
    no_of_samples = int(input("Enter your the number of data samples:"))  # Taking input no of data samples from the user
    no_of_features = int(input("Enter your the number of features:"))     # Taking input no of features from the user
    noise_value = float(input("Enter your the noise value:"))            # Taking input the noise value from user
    generateSyntheticData(no_of_samples,no_of_features,noise_value)      # Calling the generateSyntheticData() function

In [None]:
load_Datasets()            # Calling the load_Datasets() function

Enter 1 to download Housing Dataset
Enter 2 to generate Synthetic Dataset
Enter your choice:1
Downloading...
From: https://drive.google.com/uc?id=1xlJ0QiI22OSpbPKPWqTy5QeHP0_-KQUM
To: /content/housing_dataset.zip
100% 409k/409k [00:00<00:00, 6.39MB/s]
Housing Dataset has been succesfully downloaded and saved to your drive.
