In [1]:
import mysql.connector
import pandas as pd

from credentials import username, password

## Структура базы

![](database_structure.png)

Подключение и курсор

In [2]:
con = mysql.connector.connect(host="127.0.0.1", port=3307,
                              database="tolstoyphotos",
                              user=username, password=password)
cur = con.cursor()

## Создание таблиц

In [3]:
cur.execute("USE tolstoyphotos")

In [4]:
cur.execute("""
CREATE TABLE Authors (
  idAuthor INT NOT NULL,
  authorName VARCHAR(127),
  PRIMARY KEY (idAuthor));
""")

In [5]:
cur.execute("""
CREATE TABLE Locations (
  idLocation INT NOT NULL,
  locationName VARCHAR(255),
  PRIMARY KEY (idLocation));
""")

In [6]:
cur.execute("""
CREATE TABLE Rubrics (
  idRubric INT NOT NULL,
  rubricName VARCHAR(255),
  PRIMARY KEY (idRubric)
);""")

In [7]:
cur.execute("""
CREATE TABLE PhotoDescriptions (
  idPhoto INT NOT NULL,
  idAuthor INT,
  idLocation INT,
  year INT,
  photoDescription VARCHAR(511),
  FOREIGN KEY (idAuthor) REFERENCES Authors(idAuthor),
  FOREIGN KEY (idLocation) REFERENCES Locations(idLocation),
  PRIMARY KEY (idPhoto)
);""")

In [8]:
cur.execute("""
CREATE TABLE PhotoRubrics (
  idPhoto INT,
  idRubric INT,
  FOREIGN KEY (idPhoto) REFERENCES PhotoDescriptions(idPhoto),
  FOREIGN KEY (idRubric) REFERENCES Rubrics(idRubric)
);""")

In [9]:
cur.execute("""
CREATE TABLE MuseumAnnotations (
  idPhoto INT,
  idIdentif VARCHAR(63),
  idInv VARCHAR(63),
  originalAuthor VARCHAR(127),
  originalLocation VARCHAR(127),
  FOREIGN KEY (idPhoto) REFERENCES PhotoDescriptions(idPhoto)
);""")

In [10]:
cur.execute("""
CREATE TABLE PhotoPhysicalDescriptions (
  idPhoto INT,
  width FLOAT,
  height FLOAT,
  inscription VARCHAR(255),
  FOREIGN KEY (idPhoto) REFERENCES PhotoDescriptions(idPhoto)
);""")

In [11]:
cur.execute("""
CREATE TABLE PhotoFiles (
  idPhoto INT,
  miniatureFile VARCHAR(63),
  fullsizeFile VARCHAR(63),
  FOREIGN KEY (idPhoto) REFERENCES PhotoDescriptions(idPhoto)
);""")

In [12]:
con.commit()

## Добавление информации в таблицы

### Места съёмки 

In [13]:
df_temp = pd.read_csv("db_locations.tsv", sep="\t", encoding="utf-8")
df_temp.head()

Unnamed: 0,idLocation,locationName
0,0,Slovаnha
1,1,Тиктон-хауз. Крайстчерч Англия
2,2,-1
3,3,Gfhb;
4,4,Gunnes (Турция)


In [14]:
for row in df_temp.iterrows():
    id_location = row[1]['idLocation']
    location_name = row[1]['locationName']
    query = f"INSERT INTO Locations (idLocation, locationName) VALUES ({id_location}, '{location_name}');"
    try:
        cur.execute(query)
    except:
        print(query)

INSERT INTO Locations (idLocation, locationName) VALUES (Сальский округ Северо - Кавказского края., 'nan');


In [15]:
con.commit()

### Авторы фотографий

In [16]:
df_temp = pd.read_csv("db_authors.tsv", sep="\t", encoding="utf-8")
df_temp.head()

Unnamed: 0,idAuthor,authorName
0,0,-1
1,1,A.Dahlgren
2,2,A.Wilcke
3,3,ALBERT
4,4,Angerer V.


In [17]:
for row in df_temp.iterrows():
    id_author = row[1]['idAuthor']
    author_name = row[1]['authorName']
    query = f"INSERT INTO Authors (idAuthor, authorName) VALUES ({id_author}, '{author_name}');"
    try:
        cur.execute(query)
    except:
        print(query)

In [18]:
con.commit()

### Описание фотографии

In [19]:
df_temp = pd.read_csv("db_photo_descriptions.tsv", sep="\t", encoding="utf-8")
df_temp.head()

Unnamed: 0,id,AUTHOR_REFINED,GEOGR_REFINED,year,COMPNAM
0,115964,125,319,1908,Фотография для стереоскопа
1,136165,64,319,1903,Увеличенный переснимок
2,136603,202,319,1910,-1
3,106918,206,348,1932,Две могилы в общей чугунной ограде. За решётко...
4,142182,0,2,1930,-1


In [20]:
for row in df_temp.iterrows():
    id_photo = row[1]['id']
    id_author = row[1]['AUTHOR_REFINED']
    id_location = row[1]['GEOGR_REFINED']
    year = int(row[1]['year'])
    descr = str(row[1]['COMPNAM'])
    query = "INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) " + \
    f"VALUES ({id_photo}, {id_author}, {id_location}, {year}, '{descr}');"
    try:
        cur.execute(query)
    except:
        # дубликаты фотографий
        print(query)

INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (142182, 0, 2, 1930, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (142196, 0, 319, 1920, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (141448, 0, 319, 1920, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (140024, 0, 319, 1900, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (92638, 0, 319, 1906, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (109197, 0, 2, 1908, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (142237, 0, 319, 1930, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (109034, 70, 319, 1908, '-1');
INSERT INTO PhotoDes

INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (92762, 0, 319, 1922, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (140156, 200, 319, 1903, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (142033, 0, 319, 1900, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (92777, 216, 128, 1902, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (140143, 0, 319, 1900, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (140157, 0, 319, 1900, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (139331, 222, 319, 1903, 'Профиль влево, в светлой блузе.');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (92561, 0, 31

INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (142242, 0, 2, 1930, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (141978, 0, 319, 1901, 'Стоят  на площадке около крыльца.');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (139169, 222, 319, 1909, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (97292, 241, 351, 1910, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (142120, 0, 319, -1, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (140045, 0, 319, 1902, 'Сидят на диване у круглого стола в зале яснополянского дома');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (106945, 0, 340, 1900, 'На первом плане ограда из горизонтальных в четыре яруса бр

INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (114755, 0, 247, 1900, 'На реке однопалубный пароход с надписью на полукруге в центре «Громовъ»; за кормой (слева) на привязи плывет лодка). На судне наверху в центре на корме и на носу пассажиры разных слоев общества (мужчины; женщины - большинство в шляпках, некоторые под зонтиками; девочки все смотрят в строну объектива). На 2-ом плане пологий пустынный берег. Над изображением автограф Вал.Ф.Булгакова, внизу на фото и картоне этикетка с м/п текстом.');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (142261, 0, 2, 1920, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (141026, 0, 2, 1856, 'Переснимок фотографии С.Левицкого 1856 г.');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (141032, 222, 319, 1899, 'Слева направо: А. Л. Толстой, Н. Н. Ге – сын, А

INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (141425, 0, 319, 1920, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (23249, 241, 2, 1909, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (140739, 0, 2, -1, 'Фото с миниатюры акварелью');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (137758, 222, 319, 1906, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (134292, 222, 128, 1902, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (139167, 222, 319, 1909, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (91955, 0, 2, 1890, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (100719, 222, 319, 1910,

INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (47618, 200, 319, 1908, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (234, 222, 319, 1897, 'Фотография, сделанная для скульптора И.Я. Гинцбурга.');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (114269, 0, 109, 1920, 'На 1-ом плане огромное поле. На 2-ом жилой дом, амбар для хлеба, кирпичный и лесопильный заводы. Слева и справа от строений - фруктовые сады.Вдали пологие горы с редкими елями.');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (142205, 0, 319, 1920, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (137922, 222, 319, 1905, '-1');
INSERT INTO PhotoDescriptions (idPhoto, idAuthor, idLocation, year, photoDescription) VALUES (132696, 222, 319, 1898, '-1');
INSERT INTO PhotoDescriptions (idP

In [21]:
con.commit()

### Музейные пометы

In [22]:
df_temp = pd.read_csv("db_museum_annotations.tsv", sep="\t", encoding="utf-8")
df_temp.head()

Unnamed: 0,id,IDENTIF,INV,AUTHOR,GEOGR
0,115964,ГМТ КП-11486/21,Ф-12746,Кулаков П.Е.,Ясная Поляна
1,136165,ГМТ КП-5954/3,Ф-1125,Бодянский И.А.,Ясная Поляна
2,136603,ГМТ КП-5680/6,Ф-1469,Смирнов,Ясная Поляна
3,106918,ГМТ КП-7448/1,Ф-1597,Соловьев В.С.,с. Никольское Московской губ.
4,142182,ГМТ КП-13123/82,Ф-13637/82,-1,-1


In [23]:
for row in df_temp.iterrows():
    id_photo = row[1]['id']
    identif = row[1]['IDENTIF']
    inv = row[1]['INV']
    author_orig = str(row[1]['AUTHOR'])
    geogr_orig = str(row[1]['GEOGR'])
    query = "INSERT INTO MuseumAnnotations (idPhoto, idIdentif, idInv, originalAuthor, originalLocation) " + \
    f"VALUES ({id_photo}, '{identif}', '{inv}', '{author_orig}', '{geogr_orig}');"
    try:
        cur.execute(query)
    except:
        print(query)

In [24]:
con.commit()

### Физическое описание фотографий

In [25]:
df_temp = pd.read_csv("db_physical_descr.tsv", sep="\t", encoding="utf-8")
df_temp.head()

Unnamed: 0,id,width,height,INSCR
0,115964,7.4,7.1,-1
1,136165,17.8,23.5,-1
2,136603,10.3,16.5,-1
3,106918,10.3,16.7,-1
4,142182,6.0,5.4,-1


In [26]:
for row in df_temp.iterrows():
    id_photo = row[1]['id']
    width = float(row[1]['width'])
    height = float(row[1]['height'])
    inscr = row[1]['INSCR']
    query = "INSERT INTO PhotoPhysicalDescriptions (idPhoto, width, height, inscription) " + \
    f"VALUES ({id_photo}, {width}, {height}, '{inscr}');"
    try:
        cur.execute(query)
    except:
        print(query)

In [27]:
con.commit()

### Файлы

In [28]:
df_temp = pd.read_csv("db_files.tsv", sep="\t", encoding="utf-8")
df_temp.head()

Unnamed: 0,id,miniature_path,fullsize_path
0,115964,./data/miniature/00115964.jpg,./data/fullsize/00115964.jpg
1,136165,./data/miniature/00136165.jpg,./data/fullsize/00136165.jpg
2,136603,./data/miniature/00136603.jpg,./data/fullsize/00136603.jpg
3,106918,./data/miniature/00106918.jpg,./data/fullsize/00106918.jpg
4,142182,./data/miniature/00142182.jpg,./data/fullsize/00142182.jpg


In [29]:
for row in df_temp.iterrows():
    id_photo = row[1]['id']
    mini = row[1]['miniature_path']
    full = row[1]['fullsize_path']

    query = "INSERT INTO PhotoFiles (idPhoto, miniatureFile, fullsizeFile) " + \
    f"VALUES ({id_photo}, '{mini}', '{full}');"
    try:
        cur.execute(query)
    except:
        print(query)

In [30]:
con.commit()

### Рубрики

#### Список рубрик

In [31]:
df_temp = pd.read_csv("db_rubrics.tsv", sep="\t", encoding="utf-8")
df_temp.head()

Unnamed: 0,idRubric,rubricName
0,0,Портреты (в т.ч. групповые)
1,1,1900-е годы (юбилей в 1908г – отдельная рубрика)
2,2,Похороны Л.Н.Толстого в Ясной Поляне
3,3,Могилы и похороны разных лиц
4,4,"Писатели, поэты, литераторы"


In [32]:
for row in df_temp.iterrows():
    id_rubric = row[1]['idRubric']
    rubric_name = row[1]['rubricName']
    query = "INSERT INTO Rubrics (idRubric, rubricName) " + \
    f"VALUES ({id_rubric}, '{rubric_name}');"
    try:
        cur.execute(query)
    except:
        print(query)

In [33]:
con.commit()

#### Связь рубрик и фотографий

In [34]:
df_temp = pd.read_csv("db_rubrics_photos.tsv", sep="\t", encoding="utf-8")
df_temp.head()

Unnamed: 0,idPhoto,idRubric
0,115964,0
1,136165,1
2,136603,2
3,106918,3
4,142182,4


In [35]:
for row in df_temp.iterrows():
    id_photo = row[1]['idPhoto']
    id_rubric = row[1]['idRubric']
    query = "INSERT INTO  PhotoRubrics(idPhoto, idRubric) " + \
    f"VALUES ({id_photo}, {id_rubric});"
    try:
        cur.execute(query)
    except:
        print(query)

In [36]:
con.commit()

## Закрываем соединение

In [37]:
con.close()