#Image Size Dataframe

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install rasterio

In [None]:
import numpy as np
import pandas as pd
from pandas import option_context
from glob import glob
import rasterio as rio

In [None]:
from termcolor import colored
import matplotlib.pyplot as plt
from IPython.display import HTML
from IPython.display import display

In [None]:
def make_image_size_dataframe(folder_name, file_extension):
  
  image_paths = glob(folder_name + file_extension)
  image_paths = sorted(image_paths, reverse=False)
  image_paths = sorted(image_paths, key=len)

  image_sizes = []
  for image_path in image_paths:
    with rio.open(image_path, 'r') as f:
      image_sizes.append(np.shape(f.read(1)))

  image_names = []
  for image_path in image_paths:
    image_name = image_path.split('/')[-1]
    image_names.append(image_name)

  image_size_dataframe = pd.DataFrame({'file':image_names, 'size':image_sizes})

  return image_size_dataframe

In [None]:
train_image_size_df = make_image_size_dataframe('/content/drive/MyDrive/Aiffelthon/mmrotate/data/fair1m2.0/images_train/', '*.tif')
validation_image_size_df = make_image_size_dataframe('/content/drive/MyDrive/Aiffelthon/mmrotate/data/fair1m2.0/images_val/', '*.tif')

In [None]:
train_image_size_df.to_csv('/content/drive/MyDrive/Aiffelthon/eda/image_size/df/train_image_size_df.csv', index = False)
validation_image_size_df.to_csv('/content/drive/MyDrive/Aiffelthon/eda/image_size/df/validation_image_size_df.csv', index = False)

In [None]:
csv_files = glob('/content/drive/MyDrive/Aiffelthon/eda/image_size/df/' +'*image_size_df.csv')
  
for f in csv_files:
  df = pd.read_csv(f)
  file_name = ' '.join(s for s in f.split("/")[-1].split("_")[0:1])
  print(colored(file_name, attrs=['bold']))
  print()
  pd.set_option('display.max_rows', 10)
  with option_context('display.max_colwidth', None):
    display(df)
    print("\n" * 2)

[1mtrain[0m



Unnamed: 0,file,size
0,0.tif,"(1500, 1500)"
1,1.tif,"(1000, 1000)"
2,2.tif,"(1000, 1000)"
3,3.tif,"(1000, 1000)"
4,4.tif,"(1000, 1000)"
...,...,...
16483,16483.tif,"(1000, 1000)"
16484,16484.tif,"(1000, 1000)"
16485,16485.tif,"(600, 800)"
16486,16486.tif,"(1000, 1000)"





[1mvalidation[0m



Unnamed: 0,file,size
0,0.tif,"(1000, 1000)"
1,1.tif,"(1000, 1000)"
2,2.tif,"(1000, 1000)"
3,3.tif,"(1000, 1000)"
4,4.tif,"(1000, 1000)"
...,...,...
8282,8282.tif,"(600, 800)"
8283,8283.tif,"(800, 600)"
8284,8284.tif,"(600, 800)"
8285,8285.tif,"(1000, 1000)"







In [None]:
train_image_size_count_df = train_image_size_df['size'].value_counts().to_frame().reset_index()
train_image_size_count_df.columns = ['size', 'count']
validation_image_size_count_df = validation_image_size_df['size'].value_counts().to_frame().reset_index()
validation_image_size_count_df.columns = ['size', 'count']

In [None]:
train_reorderlist = list(train_image_size_count_df['size'].str.extract(r'(\d+)').astype(float).sort_values(by=[0], axis=0).astype(int).index)
train_image_size_count_sort_df = train_image_size_count_df.iloc[pd.Categorical(train_image_size_count_df.index,train_reorderlist).argsort()]
validation_reorderlist = list(validation_image_size_count_df['size'].str.extract(r'(\d+)').astype(float).sort_values(by=[0], axis=0).astype(int).index)
validation_image_size_count_sort_df = validation_image_size_count_df.iloc[pd.Categorical(validation_image_size_count_df.index,validation_reorderlist).argsort()]

In [None]:
train_image_size_count_df.to_csv('/content/drive/MyDrive/Aiffelthon/eda/image_size/df/train_image_size_count_df.csv', index = False)
validation_image_size_count_df.to_csv('/content/drive/MyDrive/Aiffelthon/eda/image_size/df/validation_image_size_count_df.csv', index = False)

In [None]:
train_image_size_count_sort_df.to_csv('/content/drive/MyDrive/Aiffelthon/eda/image_size/df/train_image_size_count_sort_df.csv', index = False)
validation_image_size_count_sort_df.to_csv('/content/drive/MyDrive/Aiffelthon/eda/image_size/df/validation_image_size_count_sort_df.csv', index = False)

In [None]:
print(colored("Train Data", attrs=['bold']))
print()
print(train_image_size_count_df['size'].unique())
print("\n")

print(colored("Validation Data", attrs=['bold']))
print()
print(validation_image_size_count_df['size'].unique())

[1mTrain Data[0m

['(1000, 1000)' '(600, 800)' '(800, 600)' '(1500, 1500)' '(800, 800)'
 '(900, 900)' '(2000, 2000)' '(700, 700)' '(600, 600)' '(4000, 4000)'
 '(3000, 3000)' '(7000, 7000)' '(5000, 5000)' '(6000, 6000)'
 '(6921, 7000)' '(4426, 5934)' '(3106, 3709)' '(5513, 4956)'
 '(5712, 5877)' '(4137, 4103)' '(3842, 3744)' '(3970, 4141)'
 '(4946, 4701)' '(4321, 4288)' '(4797, 4443)' '(5237, 5748)'
 '(3712, 3600)' '(4387, 4420)' '(5786, 5891)' '(5387, 6393)'
 '(5833, 6025)' '(3070, 2969)' '(3334, 3601)' '(7000, 6909)'
 '(3762, 3325)' '(4405, 5960)' '(4155, 4409)']


[1mValidation Data[0m

['(1000, 1000)' '(600, 800)' '(800, 600)' '(600, 600)' '(800, 800)'
 '(6000, 6000)' '(4000, 4000)' '(7000, 7000)' '(4262, 7000)'
 '(8000, 8000)' '(3355, 4000)' '(5787, 5421)' '(5847, 3182)'
 '(6583, 3762)' '(5534, 2949)' '(4278, 5332)' '(6567, 6309)'
 '(6971, 6716)' '(5836, 2676)' '(5484, 5383)' '(5598, 3034)'
 '(3422, 4000)' '(9000, 9000)' '(6421, 6215)' '(6614, 6466)'
 '(9472, 10000)' '(6075, 30

In [None]:
print(colored("Train Data", attrs=['bold']))
print()
print(train_image_size_count_sort_df['size'].unique())
print("\n")

print(colored("Validation Data", attrs=['bold']))
print()
print(validation_image_size_count_sort_df['size'].unique())

[1mTrain Data[0m

['(600, 600)' '(600, 800)' '(700, 700)' '(800, 600)' '(800, 800)'
 '(900, 900)' '(1000, 1000)' '(1500, 1500)' '(2000, 2000)' '(3000, 3000)'
 '(3070, 2969)' '(3106, 3709)' '(3334, 3601)' '(3712, 3600)'
 '(3762, 3325)' '(3842, 3744)' '(3970, 4141)' '(4000, 4000)'
 '(4137, 4103)' '(4155, 4409)' '(4321, 4288)' '(4387, 4420)'
 '(4405, 5960)' '(4426, 5934)' '(4797, 4443)' '(4946, 4701)'
 '(5000, 5000)' '(5237, 5748)' '(5387, 6393)' '(5513, 4956)'
 '(5712, 5877)' '(5786, 5891)' '(5833, 6025)' '(6000, 6000)'
 '(6921, 7000)' '(7000, 6909)' '(7000, 7000)']


[1mValidation Data[0m

['(600, 800)' '(600, 600)' '(800, 600)' '(800, 800)' '(1000, 1000)'
 '(3355, 4000)' '(3422, 4000)' '(4000, 4000)' '(4262, 7000)'
 '(4278, 5332)' '(5484, 5383)' '(5534, 2949)' '(5598, 3034)'
 '(5787, 5421)' '(5836, 2676)' '(5847, 3182)' '(6000, 6000)'
 '(6075, 3054)' '(6421, 6215)' '(6567, 6309)' '(6583, 3762)'
 '(6614, 6466)' '(6971, 6716)' '(7000, 7000)' '(8000, 8000)'
 '(9000, 9000)' '(9472, 100

In [None]:
csv_files = glob('/content/drive/MyDrive/Aiffelthon/eda/image_size/df/' +'*count_df.csv')
  
for f in csv_files:
  df = pd.read_csv(f)
  file_name = ' '.join(s for s in f.split("/")[-1].split("_")[0:1])
  print(colored(file_name, attrs=['bold']))
  print()
  pd.set_option('display.max_rows', 10)
  with option_context('display.max_colwidth', None):
    display(df)
    print("\n" * 2)

[1mtrain[0m



Unnamed: 0,size,count
0,"(1000, 1000)",6993
1,"(600, 800)",4097
2,"(800, 600)",3965
3,"(1500, 1500)",474
4,"(800, 800)",221
...,...,...
32,"(3334, 3601)",1
33,"(7000, 6909)",1
34,"(3762, 3325)",1
35,"(4405, 5960)",1





[1mvalidation[0m



Unnamed: 0,size,count
0,"(1000, 1000)",3086
1,"(600, 800)",2567
2,"(800, 600)",2362
3,"(600, 600)",132
4,"(800, 800)",61
...,...,...
22,"(9000, 9000)",1
23,"(6421, 6215)",1
24,"(6614, 6466)",1
25,"(9472, 10000)",1







In [None]:
csv_files = glob('/content/drive/MyDrive/Aiffelthon/eda/image_size/df/' +'*count_sort_df.csv')
  
for f in csv_files:
  df = pd.read_csv(f)
  file_name = ' '.join(s for s in f.split("/")[-1].split("_")[0:1])
  print(colored(file_name, attrs=['bold']))
  print()
  pd.set_option('display.max_rows', 10)
  with option_context('display.max_colwidth', None):
    display(df)
    print("\n" * 2)

[1mtrain[0m



Unnamed: 0,size,count
0,"(600, 600)",66
1,"(600, 800)",4097
2,"(700, 700)",172
3,"(800, 600)",3965
4,"(800, 800)",221
...,...,...
32,"(5833, 6025)",1
33,"(6000, 6000)",4
34,"(6921, 7000)",2
35,"(7000, 6909)",1





[1mvalidation[0m



Unnamed: 0,size,count
0,"(600, 800)",2567
1,"(600, 600)",132
2,"(800, 600)",2362
3,"(800, 800)",61
4,"(1000, 1000)",3086
...,...,...
22,"(6971, 6716)",1
23,"(7000, 7000)",17
24,"(8000, 8000)",2
25,"(9000, 9000)",1





