# Scrip to create boxplot images


## Imports

In [None]:
from IPython.display import clear_output
!pip install sktime
clear_output()

In [None]:
import pandas as pd
import numpy as np
import seaborn as sea
import matplotlib.pyplot as plt
import os
import gc

from PIL import Image
from sktime.datatypes._panel._convert import from_nested_to_2d_np_array
from sktime.datasets import load_from_tsfile_to_dataframe


## Functions

In [None]:
def imageConfigurations( color ):

  PROPS = {}
  rc = {}
  flierprops = dict()

  ######### Color #########
  if color:
    PROPS = {
        'boxprops':{'facecolor':'black', 'edgecolor':'red'},
        'medianprops':{'color':'green'},
        'whiskerprops':{'color':'blue'},
        'capprops':{'color':'yellow'}
    }

    rc = {
        'axes.facecolor':'black',
        'figure.facecolor':'black'
    }
    flierprops = dict(marker='o', markerfacecolor='r', markersize=1,
                      linestyle='none', markeredgecolor='r')
  ######### Black and white #########
  else:   
    PROPS= {
        'boxprops':{'facecolor':'black', 'edgecolor':'white'},
        'medianprops':{'color':'white'},
        'whiskerprops':{'color':'white'},
        'capprops':{'color':'white'}
    }

    rc = {
        'axes.facecolor':'black',
        'figure.facecolor':'black'
    }

    flierprops = dict(marker='o', markerfacecolor='w', markersize=1,
                      linestyle='none', markeredgecolor='w')
  
  return PROPS, rc, flierprops

In [None]:
def datasetToBoxplot ( 
    dataset,
    name = "",
    isTrain = False,
    path = "",
    color = False, 
    pureBlackAndWhite = False, 
    numberOfBoxplots = 10,
    grid = False
  ): 

  trainOrTest = "TRAIN" if isTrain else "TEST"

  PROPS, rc, flierprops = imageConfigurations(color)
  sea.set_theme(style="ticks", rc=rc)

  for count, timeseries in enumerate(dataset):

    #Create Series
    datasetSeries = pd.Series(
      timeseries, 
      index=pd.timedelta_range(
          start="00:00:00", 
          periods=timeseries.size, 
          freq="S")
      )

    indexBoxplot = (datasetSeries.index
                                 .round(str(timeseries.size//numberOfBoxplots) + 'S')
                                 .total_seconds()
                                 .astype(int))

    #Configurations
    figF, axF = plt.subplots(figsize=(6,4))
 
    axF.grid(grid)
    axF.tick_params(axis='x', colors='1') 
    axF.tick_params(axis='y', colors='1') 
    axF.spines['left'].set_color('1')       
    axF.spines['right'].set_color('1') 
    axF.spines['bottom'].set_color('1') 
    axF.spines['top'].set_color('1') 

    axF.tick_params(axis='x', labelsize=8)
    axF.tick_params(axis='y', labelsize=8)

    sea.boxplot(x=indexBoxplot, y=datasetSeries, ax=axF, linewidth='1', flierprops=flierprops, **PROPS)

    ############################################# Images
    #Create and save images in directory

    if not os.path.exists( path + "/" + name + "/" + trainOrTest ):
      os.makedirs( path + "/" + name + "/" + trainOrTest )

    figF.canvas.draw()
    img = Image.frombytes('RGB', figF.canvas.get_width_height(), figF.canvas.tostring_rgb())
    # img = Image.fromarray((data[:, :, :3] * 255).astype(np.uint8))
    img.save(path + "/" + 
                 name + "/" + 
                 trainOrTest + "/" +
                 name + str(count) +  ".png")


    # plt.savefig(path + "/" + 
    #             name + "/" + 
    #             trainOrTest + "/" +
    #             name + str(count) +  ".png")
    plt.close()

    del(img)
    del(datasetSeries)
    del(indexBoxplot)
    del(figF, axF)
    gc.collect()



In [None]:
def datasetToBoxplotImproved ( 
    dataset,
    name = "",
    isTrain = False,
    path = "",
    color = False, 
    pureBlackAndWhite = False, 
    numberOfBoxplots = 10,
    grid = False,
    showFliers = True
  ): 

  trainOrTest = "TRAIN" if isTrain else "TEST"

  PROPS, rc, flierprops = imageConfigurations(color)
  sea.set_theme(style="ticks", rc=rc)

  #Configurations
  figF, axF = plt.subplots(figsize=(6,4))

  axF.grid(grid)
  axF.tick_params(axis='x', colors='1') 
  axF.tick_params(axis='y', colors='1') 
  axF.spines['left'].set_color('1')       
  axF.spines['right'].set_color('1') 
  axF.spines['bottom'].set_color('1') 
  axF.spines['top'].set_color('1') 

  axF.tick_params(axis='x', labelsize=8)
  axF.tick_params(axis='y', labelsize=8)

  for count, timeseries in enumerate(dataset):

    #Create Series
    datasetSeries = pd.Series(
      timeseries, 
      index=pd.timedelta_range(
          start="00:00:00", 
          periods=timeseries.size, 
          freq="S")
      )

    indexBoxplot = (datasetSeries.index
                                 .round(str(timeseries.size//numberOfBoxplots) + 'S')
                                 .total_seconds()
                                 .astype(int))

    sea.boxplot(x=indexBoxplot, y=datasetSeries, ax=axF, linewidth='1', 
                showfliers=showFliers, flierprops=flierprops, **PROPS)

    ############################################# Images
    #Create and save images in directory

    if not os.path.exists( path + "/" + name + "/" + trainOrTest ):
      os.makedirs( path + "/" + name + "/" + trainOrTest )

    figF.canvas.draw()
    img = Image.frombytes('RGB', figF.canvas.get_width_height(), figF.canvas.tostring_rgb())
    # img = Image.fromarray((data[:, :, :3] * 255).astype(np.uint8))
    img.save(path + "/" + 
                 name + "/" + 
                 trainOrTest + "/" +
                 name + str(count) +  ".png")

    axF.clear()
    plt.close()

    del(img)
    del(datasetSeries)
    del(indexBoxplot)
    # del(figF, axF)
    gc.collect()


## Run code

### Configs

In [None]:
from google.colab import drive
# drive.flush_and_unmount()
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
DATA_SET_NAMES = [
"ACSF1",
"Adiac",
"ArrowHead",
"Beef",
"BeetleFly",
"BirdChicken",
"BME",
"Car",
"CBF",
"Chinatown",
"ChlorineConcentration",
"CinCECGTorso",
"Coffee",
"Computers",
"Crop",
"DiatomSizeReduction",
"DistalPhalanxOutlineAgeGroup",
"DistalPhalanxOutlineCorrect",
"DistalPhalanxTW",
"Earthquakes",
"ECG200",
"ECG5000",
"ECGFiveDays",
"ElectricDevices",
"EthanolLevel",
"FaceAll",
"FaceFour",
"FacesUCR",
"FiftyWords",
"Fish",
"FordA",
"FordB",
"FreezerRegularTrain",
"FreezerSmallTrain",
"GunPoint",
"GunPointAgeSpan",
"GunPointMaleVersusFemale",
"GunPointOldVersusYoung",
"Ham",
"Haptics",
"Herring",
"HouseTwenty",
"InlineSkate",
"InsectEPGRegularTrain",
"InsectEPGSmallTrain",
"ItalyPowerDemand",
"LargeKitchenAppliances",
"Lightning2",
"Lightning7",
"Mallat",
"Meat",
"MedicalImages",
"MiddlePhalanxOutlineAgeGroup",
"MiddlePhalanxOutlineCorrect",
"MiddlePhalanxTW",
"MixedShapesRegularTrain",
"MixedShapesSmallTrain",
"MoteStrain",
"OliveOil",
"OSULeaf",
"PhalangesOutlinesCorrect",
"Phoneme",
"PigAirwayPressure",
"PigArtPressure",
"PigCVP",
"Plane",
"ProximalPhalanxOutlineAgeGroup",
"ProximalPhalanxOutlineCorrect",
"ProximalPhalanxTW",
"RefrigerationDevices", #Optional
"Rock",
"ScreenType",
"SemgHandGenderCh2",
"SemgHandMovementCh2",
"SemgHandSubjectCh2",
"ShapeletSim",
"ShapesAll",
"SmallKitchenAppliances",
"SmoothSubspace",
"SonyAIBORobotSurface1",
"SonyAIBORobotSurface2",
"StarLightCurves",
"Strawberry",
"SwedishLeaf",
"Symbols",
"SyntheticControl",
"ToeSegmentation1",
"ToeSegmentation2",
"Trace",
"TwoLeadECG",
"TwoPatterns",
"UMD",
"UWaveGestureLibraryAll",
"Wafer",
"Wine",
"WordSynonyms",
"Worms",
"WormsTwoClass",
"Yoga"]

In [None]:
print(len(DATA_SET_NAMES))
DATA_SET_NAMES.index("MixedShapesSmallTrain")

99


56

### Script

In [None]:
# path = "/content/drive/MyDrive/Tese/Boxplots/Imagens/5_boxplots_no_fliers" 
# path = "/content/drive/MyDrive/Tese/Boxplots/Imagens/test"

numberOfBoxplots = 12

path = "/content/drive/MyDrive/Tese/Boxplots/Imagens/" + str(numberOfBoxplots) + "_boxplots_no_fliers"

# path = "/content/drive/MyDrive/Tese/Boxplots/Imagens/test"
isTrain = False
pureBlackAndWhite = False 
grid = False
showFliers = True

#If the program failed in a specific dataset, you can [DATASET_INDEX:] on front of DATA_SET_NAMES to recap
for name in DATA_SET_NAMES:
  dataSet = name
  print("Using: ", dataSet)
  train_x, train_y = load_from_tsfile_to_dataframe("drive/MyDrive/Tese/Univariate_ts/" + dataSet + "/" + dataSet + "_TRAIN.ts")
  test_x, test_y = load_from_tsfile_to_dataframe("drive/MyDrive/Tese/Univariate_ts/" + dataSet + "/" + dataSet + "_TEST.ts")
  train_x = from_nested_to_2d_np_array(train_x)
  test_x = from_nested_to_2d_np_array(test_x)

  train_x = datasetToBoxplotImproved(train_x, 
                             name = dataSet,
                             isTrain = True, 
                             path = path,
                             numberOfBoxplots = numberOfBoxplots, 
                             grid = grid,
                             showFliers = showFliers)

  test_x = datasetToBoxplotImproved(test_x, 
                            name = dataSet,
                            isTrain = False, 
                            path = path,
                            numberOfBoxplots = numberOfBoxplots,
                            grid = grid,
                            showFliers = showFliers)

  ################### RESET TO IMPROVE RAM ################################
  del(dataSet)
  del(train_x, train_y)
  del(test_x, test_y)
  gc.collect()
  #########################################################################

Using:  MixedShapesSmallTrain
Using:  MoteStrain
Using:  OliveOil
Using:  OSULeaf
Using:  PhalangesOutlinesCorrect
Using:  Phoneme
Using:  PigAirwayPressure
Using:  PigArtPressure
Using:  PigCVP
Using:  Plane
Using:  ProximalPhalanxOutlineAgeGroup
Using:  ProximalPhalanxOutlineCorrect
Using:  ProximalPhalanxTW
Using:  RefrigerationDevices
Using:  Rock
Using:  ScreenType
Using:  SemgHandGenderCh2
Using:  SemgHandMovementCh2
Using:  SemgHandSubjectCh2
Using:  ShapeletSim
Using:  ShapesAll
Using:  SmallKitchenAppliances
Using:  SmoothSubspace
Using:  SonyAIBORobotSurface1
Using:  SonyAIBORobotSurface2
Using:  StarLightCurves
Using:  Strawberry
Using:  SwedishLeaf
Using:  Symbols
Using:  SyntheticControl
Using:  ToeSegmentation1
Using:  ToeSegmentation2
Using:  Trace
Using:  TwoLeadECG
Using:  TwoPatterns
Using:  UMD
Using:  UWaveGestureLibraryAll
Using:  Wafer
Using:  Wine
Using:  WordSynonyms
Using:  Worms
Using:  WormsTwoClass
Using:  Yoga


Alternativas para guardar imagens:
- plt.imsave(img, name)
- guardar em jpeg? queria evitar
- Usar PIL, dizem que é 100x mais rapido: Try making a PIL image object, for me it's more than 100 times faster than matplotlib:


```
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

data = np.random.random((100, 100))
cm = plt.get_cmap('viridis')
img = Image.fromarray((cm(data)[:, :, :3] * 255).astype(np.uint8))
img.save('image.png')
```
If you just want greyscale, you can skip the get_cmap business — just scale your array to the range 0 to 255.

The annotations would have to be added in PIL.

One important difference from using matplotlib is that it's pixel-for-pixel. So if you want to apply some scaling, you'll have to interpolate first. You could use scipy.ndimage.zoom for that.



In order to make google colab excced time limits


In [None]:
while True:pass

KeyboardInterrupt: ignored

- VER SE AS IMAGENS NO TEST estão fixes apesar de mais rapidas. Procurar uma melhor maneira de dar save das imagens.

- VER se a ordem das imagens ao dar load, vem na ordem correta

ANALYZE: SmoothSubspace with 20 boxplots gave some erros "dividing by 0"


```
/usr/local/lib/python3.7/dist-packages/pandas/core/arrays/datetimelike.py:1698: RuntimeWarning: divide by zero encountered in divmod
  result_i8 = round_nsint64(values, mode, nanos)
```



ANALYZE: Chinatown with 50 boxplots gave some erros "dividing by 0"



```
/usr/local/lib/python3.7/dist-packages/pandas/core/arrays/datetimelike.py:1698: RuntimeWarning: divide by zero encountered in divmod
  result_i8 = round_nsint64(values, mode, nanos)
```


