In [4]:
import pandas as pd
import numpy as np
import pretty_midi as pm
import matplotlib.pyplot as plt
import librosa
import math
import jdc
import itertools

In [21]:
class Sonify:
  
  def read_data(self, path):
    """Read the data from the given csv file path."""
    if not path.endswith(".csv"): # If provided path is not a valid csv file
      print("Invalid file path. Must be .csv file.")
      raise FileNotFoundError()
    self.df = pd.read_csv(path) # Loads csv file

    # Constants used to add cumulative seconds values in function inside loop
    entries_per_day = 12*24 # 12 samples an hour (every 5 mins), 24 hours in a day
    seconds_per_day = 60*60*24

    # Constants used to create Kp comparison value
    self.density_max = np.max(self.df["proton_density"])
    self.speed_max = np.max(self.df["speed"])

    for i, row in self.df.iterrows(): # Iterates through rows, replacing error data with the average of the previous value and the next valid (i.e., non-error) value
      # Data cleanup
      self.__cleanup_column("proton_density", row, i, threshold=0)
      self.__cleanup_column("speed", row, i, threshold=0)
      self.__cleanup_column("ion_temp", row, i, threshold=0)
      self.__cleanup_column("bz", row, i, threshold=-10)
      self.__cleanup_column("phi_angle", row, i, threshold=0)

      # Adding column for seconds since start of dataset
      self.__cumulative_seconds(i, entries_per_day, seconds_per_day)

      # Adds a column for our own synthetic variable and a column for the difference between that variable and the Kp index.
      self.__kp_comparison(i, row) # Not working fully yet

    # Just for testing...
    plt.plot(self.df["cumulative_secs"], self.df["kp_index"])
    plt.title("KP Index over time")
    plt.show()

    plt.plot(self.df["cumulative_secs"], self.df["kp_equiv"])
    plt.title("KP Equiv over time")
    plt.show()

    plt.plot(self.df["cumulative_secs"], self.df["kp_diff"])
    plt.title("KP difference over time")
    plt.show()

    # print("Lowest density value:", np.min(self.df["proton_density"]))
    # print("Lowest speed value:", np.min(self.df["speed"]))

    print(self.df)
    # read the data into df
    # TODO check if path is ok, file format is ok, load data into dataframe
  
  def __cumulative_seconds(self, i, entries_per_day, seconds_per_day):
    """Adds a value for the amount of seconds passed since the start of the dataset for the current index."""
    current_day_in_dataset = math.floor(i / entries_per_day) # Gets the number of seconds to the start of the current day by rounding down
    self.df.at[i, "cumulative_secs"] = (current_day_in_dataset*seconds_per_day) + self.df.at[i, "sec_of_day"] # Adds the start of the current day and the seconds elapsed in current day.

  def __kp_comparison(self, i, row):
    """Generates a synthetic variable based on raw data which is compared to the kp index, the difference can
    be a variable we can map to something (maybe a filter parameter?)"""
    density_scaled = self.df.at[i, "proton_density"]/self.density_max # The density of the current entry scaled 0-1
    speed_scaled = self.df.at[i, "speed"]/self.speed_max # The speed of the current entry scaled 0-1
    phi_angle = self.df.at[i, "phi_angle"]

    if i == 0: # Edge case for the first index
      local_start_index = 0
      local_end_index = 2
    elif i == len(self.df.index)-1: # Edge case for the last index
      local_start_index = len(self.df.index) - 3
      local_end_index = len(self.df.index) - 1
    else: # All other indices
      local_start_index = i - 1
      local_end_index = i + 1

    local_phi_values = pd.Series.to_numpy(self.df.loc[local_start_index:local_end_index, "phi_angle"])
    local_time_values = pd.Series.to_numpy(self.df.loc[local_start_index:local_end_index, "cumulative_secs"])

    kp_equiv = density_scaled + speed_scaled * 9
    self.df.at[i, "kp_equiv"] = kp_equiv
    self.df.at[i, "kp_diff"] = row["kp_index"] - kp_equiv

  def __cleanup_column(self, column_title, row, i, threshold=0):
    """Cleans up the data by finding erroneous data, then setting it to the average of the previous cell and the next non-error cell in the specified column."""
    if row[column_title] < threshold: # If data is below threshold i.e., is an error
      next_valid_value = self.__find_next_non_error_cell(i, column_title, threshold) # Find the next non-error cell in the column
      self.df.at[i, column_title] = round((self.df.at[i-1, column_title] + next_valid_value)*0.5, 1) # Sets the current cell to the average of the previous cell and the next non-error one

  def __find_next_non_error_cell(self, i, column_title, threshold):
    """Finds the next value in a column above a given threshold using recursion."""
    next_value = self.df.at[i+1, column_title]
    if next_value > threshold: # If the next cell is above the threshold i.e., not an error, return the next cell
      return next_value
    else: # If the next cell is also an error, run the function again to try the next cell down
      return self.__find_next_non_error_cell(i+1, column_title, threshold)

  def read_midi(self, path):

    """Read the midifile into a dataframe."""
    
    midi_data = pm.PrettyMIDI(path)
    if not path.endswith(".mid"):
      print("Invalid file path. Must be .mid file.")
      raise FileNotFoundError()
    else: print('Fileload successful.')
  
    print (f'Estimated tempo of the file: {midi_data.estimate_tempo()}')

    segments = (np.array(pm.PrettyMIDI.get_beats(midi_data,start_time=0.0)))
    print((segments))
    print(segments[-1])
    self.midi_grain_start = np.linspace(0, segments[-1], len(segments)*8)
    print(len(self.midi_grain_start))


  def granulate(self, corpus):
    # Idea for how to reference grains:
    # Use another dataframe, then we can store Librosa features etc alongside each grain in a table
    # We can reference each grain by start and end indices in the numpy array of the audio

    self.grains_data = pd.DataFrame()

    for song in corpus:
      
      song_len_samp = song.size # Length of current song in samples
      total_grains_in_song = len(self.df.index) # Number of grains in song (just the length of the dataset)
      grain_len_samp = math.floor(song_len_samp / total_grains_in_song) # The length of each grain in samples
      
      for i, row in self.df.iterrows():
        self.grains_data["song_no"] = 0 # We'll need a column to tell which song a grain is coming from
        self.grain_data["grain_in_song"] = i # The current grain number within the current song
        grain_start_index = i * grain_len_samp # The index for the start sample of the current grain
        grain_end_index = (i * grain_len_samp) + grain_len_samp # The index for the end sample of the current grain
        self.grain_data["grain_pos"] = [grain_start_index, grain_end_index] # Adding grain location to dataframe as list
        # Any Librosa features could go here
        
      

In [None]:
# Use this syntax to add methods to Sonify class in other cells

%%add_to Sonify
def function(self):
  pass

In [22]:
sonify = Sonify()
#sonify.read_data("/content/drive/MyDrive/Python Assignment 4 Depot/solar_wind_data_2003-10-27 - 2003-11-02_ACTUAL.csv")
sonify.read_midi("corpus/02_Dido White Flag_adjusted.mid")

Fileload successful.
Estimated tempo of the file: 228.34817581154812
[  0.    0.5   1.    1.5   2.    2.5   3.    3.5   4.    4.5   5.    5.5
   6.    6.5   7.    7.5   8.    8.5   9.    9.5  10.   10.5  11.   11.5
  12.   12.5  13.   13.5  14.   14.5  15.   15.5  16.   16.5  17.   17.5
  18.   18.5  19.   19.5  20.   20.5  21.   21.5  22.   22.5  23.   23.5
  24.   24.5  25.   25.5  26.   26.5  27.   27.5  28.   28.5  29.   29.5
  30.   30.5  31.   31.5  32.   32.5  33.   33.5  34.   34.5  35.   35.5
  36.   36.5  37.   37.5  38.   38.5  39.   39.5  40.   40.5  41.   41.5
  42.   42.5  43.   43.5  44.   44.5  45.   45.5  46.   46.5  47.   47.5
  48.   48.5  49.   49.5  50.   50.5  51.   51.5  52.   52.5  53.   53.5
  54.   54.5  55.   55.5  56.   56.5  57.   57.5  58.   58.5  59.   59.5
  60.   60.5  61.   61.5  62.   62.5  63.   63.5  64.   64.5  65.   65.5
  66.   66.5  67.   67.5  68.   68.5  69.   69.5  70.   70.5  71.   71.5
  72.   72.5  73.   73.5  74.   74.5  75.   75.5  76.  



186.29221418559854
[  0.        0.71428   1.42856   2.14284   2.85712   3.5714    4.28568
   4.99996   5.71424   6.42852   7.1428    7.85708   8.57136   9.28564
   9.99992  10.7142   11.42848  12.14276  12.85704  13.57132  14.2856
  14.99988  15.71416  16.42844  17.14272  17.857    18.57128  19.28556
  19.99984  20.71412  21.4284   22.14268  22.85696  23.57124  24.28552
  24.9998   25.71408  26.42836  27.14264  27.85692  28.5712   29.28548
  29.99976  30.71404  31.42832  32.1426   32.85688  33.57116  34.28544
  34.99972  35.714    36.42828  37.14256  37.85684  38.57112  39.2854
  39.99968  40.71396  41.42824  42.14252  42.8568   43.57108  44.28536
  44.99964  45.71392  46.4282   47.14248  47.85676  48.57104  49.28532
  49.9996   50.71388  51.42816  52.14244  52.85672  53.571    54.28528
  54.99956  55.71384  56.42812  57.1424   57.85668  58.57096  59.28524
  59.99952  60.7138   61.42808  62.14236  62.85664  63.57092  64.2852
  64.99948  65.71376  66.42804  67.14232  67.8566   68.57088 