# Tabular data examples

In [1]:
import numpy as np
import pandas as pd

## Generate data: `from one to many`

In [7]:
N = 5000

In [8]:
dates_range = pd.date_range(start='1/1/2005', periods=N)
dates_range[-10:]

DatetimeIndex(['2018-08-31', '2018-09-01', '2018-09-02', '2018-09-03',
               '2018-09-04', '2018-09-05', '2018-09-06', '2018-09-07',
               '2018-09-08', '2018-09-09'],
              dtype='datetime64[ns]', freq='D')

In [9]:
date = pd.to_datetime(np.random.choice(dates_range, N))
region = None
var1 = np.random.random(N)
var2 = np.random.choice(['in progress', 'completed'], N)
var3 = np.random.choice([True, False], N)
rand_table = pd.DataFrame({'Date':date, 'Var1':var1, 'Var2':var2, 'Var3':var3})

In [10]:
name = ['Beiersdorf AG', 'Beiersdorf Customer Supply GmbH', 'Beiersdorf Manufacturing Berlin GmbH', 
       'Beiersdorf Manufacturing Hamburg GmbH', 'Beiersdorf Manufacturing Waldheim GmbH',"Beiersdorf Shared Services GmbH",
       "La Prairie Group Deutschland GmbH", "WINGMAN-STUDIOS GmbH"]
address = ['Unnastrasse 48', 'Unnastrasse 48', 'Franklinstrasse 1', 'Troplowitzstrasse 10', 'Am Eichberg', 
          'Quickbornstrasse 24', 'Lange Straße 65', 'Troplowitzstrasse 10']
zip_code = ['20253', '20253', '10587', '22529', '04736', '20253', '76530', '22529']
city = ['Hamburg', 'Hamburg', 'Berlin', 'Hamburg','Waldheim', 'Hamburg', 'Baden-Baden' 'Hamburg']
country = ['Germany'] * len(city)

zipped = list(zip(name, address, zip_code, city, country))
idx = np.random.choice(range(len(city)), N)
headquaters = pd.DataFrame([zipped[i] for i in idx], columns=['Name', 'Address', 'Zip-code', 'City', 'Country'])

In [11]:
data = pd.concat([rand_table, headquaters], sort=True, axis=1)

In [12]:
data.sample(10)

Unnamed: 0,Date,Var1,Var2,Var3,Name,Address,Zip-code,City,Country
2446,2005-01-02,0.053435,in progress,False,Beiersdorf Manufacturing Hamburg GmbH,Troplowitzstrasse 10,22529,Hamburg,Germany
4413,2018-06-15,0.67747,in progress,False,Beiersdorf Manufacturing Hamburg GmbH,Troplowitzstrasse 10,22529,Hamburg,Germany
3950,2006-11-10,0.14037,completed,False,Beiersdorf AG,Unnastrasse 48,20253,Hamburg,Germany
3752,2015-12-22,0.040187,in progress,False,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany
897,2013-07-31,0.192083,in progress,True,Beiersdorf Manufacturing Hamburg GmbH,Troplowitzstrasse 10,22529,Hamburg,Germany
1909,2005-03-05,0.54664,in progress,True,Beiersdorf Manufacturing Waldheim GmbH,Am Eichberg,4736,Waldheim,Germany
4785,2005-02-25,0.779751,in progress,True,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany
1187,2009-09-29,0.197042,completed,False,Beiersdorf Manufacturing Berlin GmbH,Franklinstrasse 1,10587,Berlin,Germany
1334,2014-01-09,0.72368,completed,False,Beiersdorf Customer Supply GmbH,Unnastrasse 48,20253,Hamburg,Germany
380,2016-04-13,0.485513,in progress,False,Beiersdorf Manufacturing Hamburg GmbH,Troplowitzstrasse 10,22529,Hamburg,Germany


In [13]:
data.to_csv('../data/fake_beiersdorf_data_german.csv', sep=';', index=False)
data.to_csv('../data/fake_beiersdorf_data.csv', index=False)

***

## Generate data: `from xls to xlsx`

__xls__

In [32]:
number_of_files = 50

for e in range(number_of_files):
    filename = f'../data/xls_files/fake_xlsfile_{e}.xls'
    N = np.random.randint(25, 1000) # random number of rows
    var1 = np.random.random(N)
    var2 = np.random.choice(['in progress', 'completed'], N)
    var3 = np.random.choice([True, False], N)
    rand_table = pd.DataFrame({'Var1':var1, 'Var2':var2, 'Var3':var3})
    rand_table.to_excel(filename)


__xlsx__

***

# Image download from google

https://github.com/hardikvasa/google-images-download

In [50]:
arguments = {
    "keywords": "beiersdorf",
    "limit": 15,
    "size" : '>800*600',
    "print_urls": True,
    "output_directory": '../data/images',
    'format' : 'png'
        }

In [51]:
from google_images_download import google_images_download   #importing the library
response = google_images_download.googleimagesdownload()   #class instantiation

In [53]:
paths = response.download(arguments)   #passing the arguments to the function


Item no.: 1 --> Item name = beiersdorf
Evaluating...
Starting Download...
Image URL: https://www.beiersdorf.de/~/media/Beiersdorf/brands/overview/Product-Range-Large-Stage-Teaser.png?w=1280&h=450&ch=c&crop=1
Completed Image ====> 1.Product-Range-Large-Stage-Teaser.png
Image URL: https://www.beiersdorf.de/~/media/Beiersdorf/home/teasers/Product-Range-Small-teaser-home-padding.png?mh=600&highRes=1
Completed Image ====> 2.Product-Range-Small-teaser-home-padding.png
Image URL: https://www.beiersdorf.de/~/media/Beiersdorf/newsroom/press-releases/2019/2019-09-09-beiersdorf-launches-skincare-brand-for-tattooed-skin/Beiersdorf-skin-stories-logo-teaser-2019.png
Completed Image ====> 3.Beiersdorf-skin-stories-logo-teaser-2019.png
Image URL: https://www.beiersdorf.de/~/media/Beiersdorf/local/de/about-us/Beiersdorf-Weltkarte-Beiersdorf.png
Completed Image ====> 4.Beiersdorf-Weltkarte-Beiersdorf.png
Image URL: https://www.beiersdorf.de/~/media/Beiersdorf/sustainability/stakeholder-engagement/Mitar