<a href="https://colab.research.google.com/github/Vishal8848/machine-learning/blob/main/machine_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Machine Learning Contents:

1. Creating Colab in **Drive**
2. Connecting Repo to **GitHub**
3. Making **Kaggle** Available Online
4. Lively Access to **Kaggle** Datasets
5. Extract Required Datasets
6. **Pandas** - Managing Dataframes
7. Creating **Fake** Datasets (for own use)
8. A **Real** Problem (_see below_)

In [None]:
#1 - Creating Colab in Drive
print('Google Colab Created in Drive')

Google Colab Created in Drive


In [None]:
#2 - Connecting Repo to GitHub
print('Connected to GitHub @ Vishal8848')

Connected to GitHub @ Vishal8848


In [2]:
#3 - Making Kaggle Available Online

# Installing Kaggle
! pip install -q kaggle

In [3]:
# Upload kaggle.json File
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"vp8848","key":"55fb49a30117a3600d9594f63ed1c9ae"}'}

In [4]:
# Create Kaggle Folder
! mkdir ~/.kaggle

In [5]:
# Copy kaggle.json File to Kaggle Folder
! cp kaggle.json ~/.kaggle

# Check If kaggle.json Exists
! ls -a ~/.kaggle

.  ..  kaggle.json


In [6]:
# Grant Permission to Access JSON
! chmod 600 ~/.kaggle/kaggle.json

In [7]:
#4 - Lively Access Kaggle Datasets

# List Availabe Datasets
! kaggle datasets list

ref                                                            title                                             size  lastUpdated          downloadCount  voteCount  usabilityRating  
-------------------------------------------------------------  -----------------------------------------------  -----  -------------------  -------------  ---------  ---------------  
datasets/muratkokludataset/date-fruit-datasets                 Date Fruit Datasets                              408KB  2022-04-03 09:25:39            739        200  0.9375           
datasets/piterfm/2022-ukraine-russian-war                      2022 Ukraine Russia War                            2KB  2022-04-16 14:04:28           8711        485  1.0              
datasets/muratkokludataset/acoustic-extinguisher-fire-dataset  Acoustic Extinguisher Fire Dataset               621KB  2022-04-02 22:59:36             44        181  0.9375           
datasets/kamilpytlak/personal-key-indicators-of-heart-disease  Personal Key Indi

In [8]:
#5 - Extract Required Dataset - Video Game Sales (vgsales.csv)

# Download .zip File
! kaggle datasets download -d gregorut/videogamesales

# Unzip Dataset
! unzip videogamesales.zip

Downloading videogamesales.zip to /content
  0% 0.00/381k [00:00<?, ?B/s]
100% 381k/381k [00:00<00:00, 98.7MB/s]
Archive:  videogamesales.zip
  inflating: vgsales.csv             


In [12]:
#6 - Pandas: Managing Dataframes

import pandas as pd

# Read CSV
df = pd.read_csv('vgsales.csv')

print(df.shape) # print(df)

df.describe()

(16598, 11)


Unnamed: 0,Rank,Year,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
count,16598.0,16327.0,16598.0,16598.0,16598.0,16598.0,16598.0
mean,8300.605254,2006.406443,0.264667,0.146652,0.077782,0.048063,0.537441
std,4791.853933,5.828981,0.816683,0.505351,0.309291,0.188588,1.555028
min,1.0,1980.0,0.0,0.0,0.0,0.0,0.01
25%,4151.25,2003.0,0.0,0.0,0.0,0.0,0.06
50%,8300.5,2007.0,0.08,0.02,0.0,0.01,0.17
75%,12449.75,2010.0,0.24,0.11,0.04,0.04,0.47
max,16600.0,2020.0,41.49,29.02,10.22,10.57,82.74


In [11]:
# 2D Array to Represent Dataset
df.values

array([[1, 'Wii Sports', 'Wii', ..., 3.77, 8.46, 82.74],
       [2, 'Super Mario Bros.', 'NES', ..., 6.81, 0.77, 40.24],
       [3, 'Mario Kart Wii', 'Wii', ..., 3.79, 3.31, 35.82],
       ...,
       [16598, 'SCORE International Baja 1000: The Official Game', 'PS2',
        ..., 0.0, 0.0, 0.01],
       [16599, 'Know How 2', 'DS', ..., 0.0, 0.0, 0.01],
       [16600, 'Spirits & Spells', 'GBA', ..., 0.0, 0.0, 0.01]],
      dtype=object)

In [15]:
#8 - Creating Fake Datasets (for own use) - Musical Genre Predictor

#8.1 Create Custom Range
genre = [ "Jazz", "Classic", "Hip Hop", "Dance", "Acoustic" ]
gender = [ "Male", "Female" ]
age = { "min" : 15, "max" : 65 }

#8.2 Creating Columns - SNO, Age, Gender, Genre
dataset = []

# Randomly Assign Values
import random

for i in range(1000):
  rand_age = random.randint(age["min"], age["max"])
  rand_gender = random.choice(gender)
  rand_genre = random.choice(genre)
  datum = [i + 1, rand_age, rand_gender, rand_genre]
  dataset.append(datum)

# Creating the CSV
import csv

columns = [ "SNo", "Age", "Gender", "Genre" ]

filename = "musical_genre.csv"

with open(filename, "w") as datastore:

  # CSV File Pointer
  csvstore = csv.writer(datastore)

  # Writing Columns
  csvstore.writerow(columns)

  # Writing Data Rows
  csvstore.writerows(dataset)

print(dataset)

[[1, 56, 'Female', 'Acoustic'], [2, 61, 'Female', 'Classic'], [3, 15, 'Female', 'Acoustic'], [4, 62, 'Male', 'Classic'], [5, 65, 'Male', 'Hip Hop'], [6, 59, 'Female', 'Hip Hop'], [7, 21, 'Female', 'Dance'], [8, 38, 'Male', 'Hip Hop'], [9, 50, 'Female', 'Hip Hop'], [10, 23, 'Male', 'Dance'], [11, 49, 'Female', 'Jazz'], [12, 50, 'Female', 'Acoustic'], [13, 46, 'Female', 'Acoustic'], [14, 35, 'Male', 'Hip Hop'], [15, 33, 'Male', 'Classic'], [16, 21, 'Female', 'Hip Hop'], [17, 39, 'Female', 'Jazz'], [18, 43, 'Female', 'Hip Hop'], [19, 38, 'Male', 'Jazz'], [20, 46, 'Female', 'Jazz'], [21, 63, 'Male', 'Acoustic'], [22, 16, 'Female', 'Dance'], [23, 29, 'Female', 'Jazz'], [24, 15, 'Male', 'Hip Hop'], [25, 20, 'Female', 'Jazz'], [26, 24, 'Male', 'Classic'], [27, 52, 'Male', 'Classic'], [28, 27, 'Male', 'Hip Hop'], [29, 51, 'Male', 'Classic'], [30, 18, 'Male', 'Hip Hop'], [31, 50, 'Male', 'Jazz'], [32, 29, 'Female', 'Classic'], [33, 32, 'Male', 'Acoustic'], [34, 40, 'Female', 'Classic'], [35, 28

### A Real Problem - Music Genre Prediction
  - Import Data
  - Clean Data
  - Split Data into Training and Testing
  - Create an ML Model
  - Train the Model
  - Make Predictions
  - Evaluate and Improve

In [18]:
# Importing the Data
import pandas as pd

df = pd.read_csv("musical_genre.csv")

df

Unnamed: 0,SNo,Age,Gender,Genre
0,1,56,Female,Acoustic
1,2,61,Female,Classic
2,3,15,Female,Acoustic
3,4,62,Male,Classic
4,5,65,Male,Hip Hop
...,...,...,...,...
995,996,58,Female,Hip Hop
996,997,45,Male,Acoustic
997,998,39,Female,Classic
998,999,48,Male,Jazz
