In [None]:
# Install dependencies
!pip install ffmpy ffmpeg pydub spleeter librosa pytube

In [None]:
import os, time
from pydub import AudioSegment
import numpy as np
import scipy
from ffmpy import FFmpeg
from scipy.io import wavfile
from scipy.signal import get_window
import IPython.display as ipd
import matplotlib.pyplot as plt
import spleeter
import librosa
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from functools import reduce
from pytube import YouTube, Playlist
from pytube.exceptions import VideoUnavailable
import seaborn as sns


%matplotlib inline

In [None]:
# Data preprocessing: 
# 1. download song from youtube
# 2. change the format to wav
# 3. isolate the vocals
# 4. cut the song into 6-second slices and label them as real or artificial
# 5. take the mfcc of these slices

In [None]:
# Helper functions for filepaths

# Gets relative paths of a folders contents
def filepaths(folder):
    #realpath = os.path.realpath("mlproject.pynb")
    #parent_directory = os.path.dirname(realpath)
    #folder_path = os.path.join(parent_directory, folder)
    paths = []
    for filename in os.listdir(folder):
        if not filename.startswith("."):
            relative_path = os.path.join(folder, filename)
            paths.append(relative_path)
    return(paths)

# Relative filepaths of isolated vocals
def get_folder(folder):
    paths = []
    for subfolder in filepaths(folder):
        for filename in filepaths(subfolder):
            if not("accompaniment" in filename):
                #relative_path = os.path.join(folder, subfolder, filename)
                paths.append(filename)
    return(paths)

In [None]:
# Download the songs from youtube
# format will be mp4

artificial_pl = Playlist("https://www.youtube.com/playlist?list=PLB02wINShjkBKnLfufaEPnCupGO-SK6e4") 
human_pl     = Playlist("https://www.youtube.com/playlist?list=PLenUt8-yT90h031pCIMKJLw49uZ_2b62L")

# get the length of RawAudio dirs and start from there so that not every video needs
# to be checked
for video in zip((artificial_pl.videos[:15]), (human_pl.videos[:15])):
    try:
        art_stream = video[0].streams.get_by_itag(139)
        #real_stream = video[1].streams.get_by_itag(139)
    except VideoUnavailable:
        print(f'Video is unavailable, skipping.')
    else:
        art_stream.download(output_path="0RawAudio/")
        #real_stream.download(output_path="1RawAudio/")
        print("Downloaded " + art_stream.title)


In [None]:
# Convert to wav
def convert_to_wav(folder, output_dir):
    input_dict  = {}
    output_dict = {}
    for song in folder:
        name = song.split("/")[1]
        song_name   = name.split('.')[0]
        output_name = output_dir + song_name + ".wav" 
        print(song)
        print(song_name + output_name)
        os.system("ffmpeg -i " + '"' + song + '"' + " -ac 2 -f wav " + '"' + output_name + '"')

In [None]:
convert_to_wav(filepaths("0RawAudio"), "0Wav/")
convert_to_wav(filepaths("1RawAudio"), "1Wav/")

In [None]:
features = []
labels   = []

# Take the wav-files and isolate the vocals
def isolate(songs, output_dir):
    # Isolate vocals
    #output_dir = "Vocals/ "
    for song in songs:
        #split = song.split("/")
        #songName = song.split("/")[1]
        song_with_quotes = '"' + song + '"' 
        print("spleeter separate -o " + output_dir + " " + song_with_quotes)
        os.system("spleeter separate -o " + output_dir + " " + song_with_quotes)
        #time.sleep(45)
        #os.system("rm " + song_with_quotes)

# songs: list of song by their filepath
# label: 0 for artificial 1 for real
# all songs in songFolder are excpected to be of one type
def split(songs, label):
    # Slice the vocals of every 
    # they might be different length so only continue until a full 6-second slice cannot
    # be made
    
    for song in songs:
        # start and endtimes in milliseconds
        t1 = 0
        t2 = 6000
        while t2 < librosa.get_duration(path=song) * 1000:
            songName = song.split("/")[1] + str(t1) + "-" + str(t2)
            name_with_quotes = '"' + songName + ".wav" + '"'
            newAudio = AudioSegment.from_wav(song)
            newAudio = newAudio[t1:t2]
            newAudio.export("AudioSplices/" + name_with_quotes, format="wav")
            t1, t2 = t1 + 6000, t2 + 6000
            # Every slice of audio is a new datapoint, add to label-vector
            # every 6s slice results in an mfcc array that has 259 rows,
            # add this to the label-vector so that the samples are properly
            # labeled
            labels.extend([label] * 259)
        t1, t2 = 0, 6000
        

def get_mfcc(songs):
    for song in songs:
        y, sr = librosa.load(song)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=32, 
                                     window=scipy.signal.windows.hann)
        features.append(mfccs)

In [None]:
os.system("rm -r AudioSplices/")
os.system("mkdir AudioSplices")
isolate((filepaths("0Wav")), "0Isolated/")
isolate((filepaths("1Wav")), "1Isolated/")
split(get_folder("0Isolated"), 0)
split(get_folder("1Isolated"), 1)
get_mfcc(filepaths("AudioSplices"))
get_mfcc(filepaths("AudioSplices1"))

In [None]:
def generate_confusion_matrix(y_true, y_pred):
    # visualize the confusion matrix
    ax = plt.subplot()
    c_mat = confusion_matrix(y_true, y_pred)
    sns.heatmap(c_mat, annot=True, fmt='g', ax=ax)

    ax.set_xlabel('Predicted labels', fontsize=15)
    ax.set_ylabel('True labels', fontsize=15)
    ax.set_title('Confusion Matrix', fontsize=15)

In [None]:
# Fit and test logistic regression model
X = reduce(lambda a, b: np.concatenate((a,b), axis=1), features)
y = np.array(labels)

# 80/20 split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(np.transpose(X), y, test_size=0.20, random_state=10)

regr = LogisticRegression(max_iter=100000)
regr.fit(X_train, y_train)
y_pred = regr.predict(X_test)
# accuracy_score uses 1/0 loss in its implementation
accuracy = accuracy_score(y_test, y_pred)
generate_confusion_matrix(y_test, y_pred)
plt.show
print(f"Accuracy score: {100*accuracy:.2f}%")

In [None]:
knn = KNeighborsClassifier(n_neighbors=, weights="distance")
knn.fit(X_train, y_train)
knn_y_pred = knn.predict(X_test)
knn_accuracy = accuracy_score(y_test, knn_y_pred)
generate_confusion_matrix(y_test, knn_y_pred)
plt.show
print(f"Accuracy score: {100*knn_accuracy:.2f}%")