<a href="https://colab.research.google.com/gist/dbalint95/ea3ef463061e999a6d96ae0a7370c350/balint_david_h214z0_dl_small_homework_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
"""
Author: Balint David
Neptun code: H214Z0
BME Deep Learning Course, Small homework I.

Note: Every input file (images and audio file) should be located in the folder where the script is called from.
"""

import bs4  # for webpage handling
import os  # for general maneuvers between directories
import urllib.request  # for webpage opening
import numpy as np  # for data handling
import pandas as pd  # for data handling

from matplotlib import pyplot as plt  # for visualizations
from PIL import Image  # for image parsing
from scipy import stats  # for standardization
from scipy import signal  # for audio file handling
from scipy.io import wavfile  # for audio file handling

In [0]:
""" 1. and 2. sub-task """

# Get current working directory
working_dir = os.getcwd()
# Iterate through the content of the directory, and look for .png files
for file in os.listdir(working_dir):
    if file.endswith(".png"):
        print("Image found: {}".format(file))

        # Open the image
        image = Image.open(file)
        image.load()
        image_data = np.asarray(image, dtype="int32")  # Contains the data for RGB and opacity channels.

        # Separate data of RGB channels
        rgb_channels = {'red': image_data[:, :, 0],
                        'green': image_data[:, :, 1],
                        'blue': image_data[:, :, 2]}

        # Iterate though the RGB channels
        for color, channel_data in rgb_channels.items():
            print("Channel data of {}: {}".format(color, channel_data))

            channel_mean = np.mean(channel_data)  # Mean of the current R/G/B channel data
            channel_std = np.std(channel_data)  # Standard deviation of the current R/G/B channel data
            print("Mean of {} channel data: {}".format(color, channel_mean))
            print("Standard deviation of {} channel data: {}".format(color, channel_std))

            channel_data_standardized = stats.zscore(channel_data.flatten())  # Standardized version of the given R/G/B channel data
            print("Standardized channel data of {}: {}".format(color, channel_data_standardized))
            print("Mean of {} standardized channel data: {}".format(color, np.mean(channel_data_standardized)))  # Should be zero or very close to zero
            print("Standard deviation of {} standardized channel data: {}".format(
                color, np.std(channel_data_standardized)))  # # Should be one or very close to one

        fig = plt.figure()  # Create new figure for each image
        plt.imshow(image)  # Plot the image
plt.show()  # Show plots

In [0]:
""" 3. sub-task """

link = "https://blog.keras.io/the-future-of-deep-learning.html"  # Link to be read as parameter
webpage = str(urllib.request.urlopen(link).read())  # Open with the URL
soup = bs4.BeautifulSoup(webpage, features="html.parser")  # Create a BS container object, it has a lot methods to work with URL websites
webpage_text = soup.get_text()  # Get text of the website
print("Text of the webpage: {}\n{}".format(link, webpage_text))  # Print the text
char_list = list(webpage_text)  # Separate each character into a list
ser = pd.Series(char_list)  # Create a pandas Series object from the list
ser = ser.str.lower()  # Make every character lower-case (if possible)
ser = ser[ser.str.isalpha()]  # Reduce the list of characters to ones that are present in the alphabet
ser = ser.groupby(ser.values).sum()  # Group the characters by the content of the Series, and summarize them: 
# if there are 30 'a' in the text, then at the index 'a' there shall be a string of 'aaaa.....a' width a length of 30

# Iterate through the rows (the letters that are present in the text)
for index, value in ser.items():
    ser[index] = len(value)  # Change them width their length (basically with their number of occurences)
ser = ser.sort_values(ascending=False)  # Sort them in descending mode

fig = plt.figure()  # Create new figure
plt.bar(ser.index, ser, width=0.5, color='g')  # Create a histogram
plt.show()  # Show the plot

In [0]:
""" 4. sub-task """

sample_rate, samples = wavfile.read('audio_1.wav')  # Read the wavfile, it shall return the samples and the sample rate
frequencies, times, spectrogram = signal.spectrogram(samples, sample_rate)  # Create a spectogram object

plt.pcolormesh(times, frequencies, np.log(spectrogram))  # Take the natural logarithm of the spectrogram for better visualization
plt.ylabel('Frequency [Hz]')  # Set y label
plt.xlabel('Time [sec]')  # Set x label
plt.show()  # Show the plot