# Scrip to create violinplot images


## Imports

In [None]:
from IPython.display import clear_output
!pip install sktime
clear_output()

In [None]:
import pandas as pd
import numpy as np
import seaborn as sea
import matplotlib.pyplot as plt
import matplotlib
import os
import gc

from PIL import Image
from sktime.datatypes._panel._convert import from_nested_to_2d_np_array
from sktime.datasets import load_from_tsfile_to_dataframe


## Functions

In [None]:
def imageConfigurationsViolin( color ):

  PROPS = {}
  rc = {}
  flierprops = dict()

  ######### Color #########
  if color:
    PROPS = {
        'bodies':{'facecolors':'red'},
        'cmeans':{'facecolors':'green'},
        'cmins':{'facecolor':'blue'},
        'cmaxes':{'edgecolor':'yellow'},
        'bodies':{'colors':'red'},
        'cbars':{'colors':'green'},
        'cmedians':{'colors':'blue'},
        'cquantiles':{'colors':'yellow'}

    }

    rc = {
        'axes.facecolor':'black',
        'axes.edgecolor':'red',
        'figure.facecolor':'black'
    }
    flierprops = dict(marker='o', markerfacecolor='r', markersize=1,
                      linestyle='none', markeredgecolor='r')
  ######### Black and white #########
  else:   
    PROPS= {
        'boxprops':{'facecolor':'black', 'edgecolor':'white'},
        'medianprops':{'color':'white'},
        'whiskerprops':{'color':'white'},
        'capprops':{'color':'white'}
    }

    rc = {
        'axes.facecolor':'black',
        #'axes.labelcolor':'white',
        #'axes.edgecolor':'red',
        'figure.facecolor':'black'
        
        # 'axes.spines.left.color':'white',
        # 'axes.spines.right.color':'white',
        # 'axes.spines.top.color':'white',
        # 'axes.spines.bottom.color':'white'
    }

    flierprops = dict(marker='o', markerfacecolor='w', markersize=1,
                      linestyle='none', markeredgecolor='w')
  
  return PROPS, rc, flierprops

In [None]:
def datasetToViolinplot ( 
    dataset,
    name = "",
    isTrain = False,
    path = "",
    color = False, 
    pureBlackAndWhite = False, 
    numberOfViolinplots = 10,
    grid = False,
    inner = "box"
  ): 

  trainOrTest = "TRAIN" if isTrain else "TEST"

  PROPS, rc, flierprops = imageConfigurationsViolin(color)
  sea.set_theme(style="ticks", rc=rc)

  #Configurations
  figF, axF = plt.subplots(figsize=(6,4))

  axF.grid(grid)
  axF.tick_params(axis='x', colors='1') 
  axF.tick_params(axis='y', colors='1') 
  axF.spines['left'].set_color('1')       
  axF.spines['right'].set_color('1') 
  axF.spines['bottom'].set_color('1') 
  axF.spines['top'].set_color('1') 

  axF.tick_params(axis='x', labelsize=8)
  axF.tick_params(axis='y', labelsize=8)

  for count, timeseries in enumerate(dataset):

    #Create Series
    datasetSeries = pd.Series(
      timeseries, 
      index=pd.timedelta_range(
          start="00:00:00", 
          periods=timeseries.size, 
          freq="S")
      )

    indexViolinplot = (datasetSeries.index
                                 .round(str(timeseries.size//numberOfViolinplots) + 'S')
                                 .total_seconds()
                                 .astype(int))

    sea.violinplot(x=indexViolinplot, y=datasetSeries, ax=axF, palette=['0'],
                   flierprops=flierprops, inner=inner, **PROPS)
    
    [item.set_edgecolor('1') for item in axF.collections]
    [item.set_color('1') for item in axF.lines]
    [(item.set_color('0') if type(item) is matplotlib.collections.PathCollection else item) for item in axF.collections]

    ############################################# Images
    #Create and save images in directory

    if not os.path.exists( path + "/" + name + "/" + trainOrTest ):
      os.makedirs( path + "/" + name + "/" + trainOrTest )

    figF.canvas.draw()
    img = Image.frombytes('RGB', figF.canvas.get_width_height(), figF.canvas.tostring_rgb())
    # img = Image.fromarray((data[:, :, :3] * 255).astype(np.uint8))
    img.save(path + "/" + 
                 name + "/" + 
                 trainOrTest + "/" +
                 name + str(count) +  ".png")

    axF.clear()
    plt.close()

    del(img)
    del(datasetSeries)
    del(indexViolinplot)
    # del(figF, axF)
    gc.collect()


## Run code

### Configs

In [None]:
from google.colab import drive
# drive.flush_and_unmount()
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
DATA_SET_NAMES = [
"ACSF1",
"Adiac",
"ArrowHead",
"Beef",
"BeetleFly",
"BirdChicken",
"BME",
"Car",
"CBF",
"Chinatown",
"ChlorineConcentration",
"CinCECGTorso",
"Coffee",
"Computers",
"Crop",
"DiatomSizeReduction",
"DistalPhalanxOutlineAgeGroup",
"DistalPhalanxOutlineCorrect",
"DistalPhalanxTW",
"Earthquakes",
"ECG200",
"ECG5000",
"ECGFiveDays",
"ElectricDevices",
"EthanolLevel",
"FaceAll",
"FaceFour",
"FacesUCR",
"FiftyWords",
"Fish",
"FordA",
"FordB",
"FreezerRegularTrain",
"FreezerSmallTrain",
"GunPoint",
"GunPointAgeSpan",
"GunPointMaleVersusFemale",
"GunPointOldVersusYoung",
"Ham",
"Haptics",
"Herring",
"HouseTwenty",
"InlineSkate",
"InsectEPGRegularTrain",
"InsectEPGSmallTrain",
"ItalyPowerDemand",
"LargeKitchenAppliances",
"Lightning2",
"Lightning7",
"Mallat",
"Meat",
"MedicalImages",
"MiddlePhalanxOutlineAgeGroup",
"MiddlePhalanxOutlineCorrect",
"MiddlePhalanxTW",
"MixedShapesRegularTrain",
"MixedShapesSmallTrain",
"MoteStrain",
"OliveOil",
"OSULeaf",
"PhalangesOutlinesCorrect",
"Phoneme",
"PigAirwayPressure",
"PigArtPressure",
"PigCVP",
"Plane",
"ProximalPhalanxOutlineAgeGroup",
"ProximalPhalanxOutlineCorrect",
"ProximalPhalanxTW",
"RefrigerationDevices", #Optional
"Rock",
"ScreenType",
"SemgHandGenderCh2",
"SemgHandMovementCh2",
"SemgHandSubjectCh2",
"ShapeletSim",
"ShapesAll",
"SmallKitchenAppliances",
"SmoothSubspace",
"SonyAIBORobotSurface1",
"SonyAIBORobotSurface2",
"StarLightCurves",
"Strawberry",
"SwedishLeaf",
"Symbols",
"SyntheticControl",
"ToeSegmentation1",
"ToeSegmentation2",
"Trace",
"TwoLeadECG",
"TwoPatterns",
"UMD",
"UWaveGestureLibraryAll",
"Wafer",
"Wine",
"WordSynonyms",
"Worms",
"WormsTwoClass",
"Yoga"]

In [None]:
print(len(DATA_SET_NAMES))
DATA_SET_NAMES.index("Wafer")

99


93

### Script

In [None]:
numberOfViolinplots = 20

path = "/content/drive/MyDrive/Tese/Violinplots/Imagens/" + str(numberOfViolinplots) + "_violinplots_box"
# path = "/content/drive/MyDrive/Tese/Boxplots/Imagens/test"
isTrain = False
pureBlackAndWhite = False 
grid = False

#Specify if what you want inside the violinPlot
#inner = None
inner = "box"

#If the program failed in a specific dataset, you can [DATASET_INDEX:] on front of DATA_SET_NAMES to recap
for name in DATA_SET_NAMES:
  dataSet = name
  print("Using: ", dataSet)
  train_x, train_y = load_from_tsfile_to_dataframe("drive/MyDrive/Tese/Univariate_ts/" + dataSet + "/" + dataSet + "_TRAIN.ts")
  test_x, test_y = load_from_tsfile_to_dataframe("drive/MyDrive/Tese/Univariate_ts/" + dataSet + "/" + dataSet + "_TEST.ts")
  train_x = from_nested_to_2d_np_array(train_x)
  test_x = from_nested_to_2d_np_array(test_x)

  train_x = datasetToViolinplot(train_x, 
                             name = dataSet,
                             isTrain = True, 
                             path = path,
                             numberOfViolinplots = numberOfViolinplots, 
                             grid = grid,
                             inner = inner)

  test_x = datasetToViolinplot(test_x, 
                            name = dataSet,
                            isTrain = False, 
                            path = path,
                            numberOfViolinplots = numberOfViolinplots,
                            grid = grid,
                            inner = inner)

  ################### RESET TO IMPROVE RAM ################################
  del(dataSet)
  del(train_x, train_y)
  del(test_x, test_y)
  gc.collect()
  #########################################################################

Using:  Wafer
Using:  Wine
Using:  WordSynonyms
Using:  Worms
Using:  WormsTwoClass
Using:  Yoga


In order to make google colab excced time limits

In [None]:
while True:pass

ANALYZE: SmoothSubspace with 20 boxplots gave some erros "dividing by 0"


```
/usr/local/lib/python3.7/dist-packages/pandas/core/arrays/datetimelike.py:1698: RuntimeWarning: divide by zero encountered in divmod
  result_i8 = round_nsint64(values, mode, nanos)
```



ANALYZE: Chinatown with 50 boxplots gave some erros "dividing by 0"



```
/usr/local/lib/python3.7/dist-packages/pandas/core/arrays/datetimelike.py:1698: RuntimeWarning: divide by zero encountered in divmod
  result_i8 = round_nsint64(values, mode, nanos)
```


