In [2]:
import numpy as np
import netCDF4 as nc
import pandas as pd
import numpy.ma as ma
import matplotlib as mpl
import matplotlib.pyplot as pltz
import xarray as xar
import rioxarray
from sqlalchemy import create_engine
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import Mapped
from sqlalchemy import ForeignKey, Sequence, MetaData
from sqlalchemy import select, update
from sqlalchemy.orm import mapped_column, DeclarativeBase
from datetime import datetime

In [33]:
# PacificOcean | AtlanticOcean | IndianOceanNew | NorthSea
full_path=r'./data/IndianOceanNew.csv'

In [34]:
with open(full_path,'rb') as f:
    data=pd.read_csv(f,encoding='utf-8')
data.head()

Unnamed: 0,code,location,city,country,lat,lon
0,rodr,Rodrigues,罗德里格斯岛,Mauritius,-19.68024,63.42119
1,ptlu,Pt.Louis,路易斯港,Mauritius,-20.15716,57.5043
2,blueb,Blue Bay,蓝湾,Mauritius,-20.444133,57.71095
3,reun2,Pointe des Galets (Reunion Island),加莱角,France,-20.92,55.28
4,toam2,Toamasina 2,图阿马西纳,Madagascar,-18.1536,49.4281


In [35]:
data.columns

Index(['code', 'location', 'city', 'country', 'lat', 'lon'], dtype='object')

In [36]:
# 数据库的配置，配置借鉴自 django 的 settings 的结构
DATABASES = {
    'default': {
        'ENGINE': 'mysqldb',  # 数据库引擎
        'NAME': 'surge_global_sys',  # 数据库名
        'USER': 'root',  # 账号
        'PASSWORD': '123456',
        'HOST': '127.0.0.1',  # HOST
        'POST': 3306,  # 端口
        'OPTIONS': {
            "init_command": "SET foreign_key_checks = 0;",
        },
    },
}

### 尝试将行政区域信息提取并写入 tb:region_info


In [37]:
DEFAULT_COUNTRY_INDEX = -1

In [38]:
class DbFactory:
    """
        数据库工厂
    """

    def __init__(self, db_mapping: str = 'default', engine_str: str = None, host: str = None, port: str = None,
                 db_name: str = None,
                 user: str = None,
                 pwd: str = None):
        """
            mysql 数据库 构造函数
        :param db_mapping:
        :param engine_str:
        :param host:
        :param port:
        :param db_name:
        :param user:
        :param pwd:
        """
        db_options = DATABASES.get(db_mapping)
        self.engine_str = engine_str if engine_str else db_options.get(
            'ENGINE')
        self.host = host if host else db_options.get('HOST')
        self.port = port if port else db_options.get('POST')
        self.db_name = db_name if db_name else db_options.get('NAME')
        self.user = user if user else db_options.get('USER')
        self.password = pwd if pwd else db_options.get('PASSWORD')
        # TypeError: Invalid argument(s) 'encoding' sent to create_engine(), using configuration MySQLDialect_mysqldb/QueuePool/Engine.  Please check that the keyword arguments are appropriate for this combination of components.
        self.engine = create_engine(
            f"mysql+{self.engine_str}://{self.user}:{self.password}@{self.host}:{self.port}/{self.db_name}?charset=utf8",
            pool_pre_ping=True, future=True)
        self._session_def = sessionmaker(bind=self.engine)

    @property
    def Session(self) -> sessionmaker:
        if self._session_def is None:
            self._session_def = sessionmaker(bind=self.engine)
        return self._session_def()


In [39]:
class BaseMeta(DeclarativeBase):
    pass


class IIdModel(BaseMeta):
    __abstract__ = True
    id: Mapped[int] = mapped_column(primary_key=True)


class IDel(BaseMeta):
    """
        软删除 抽象父类
    """
    __abstract__ = True
    is_del: Mapped[int] = mapped_column(nullable=False, default=0)


class IModel(BaseMeta):
    """
        model 抽象父类，主要包含 创建及修改时间
    """
    __abstract__ = True
    gmt_create_time: Mapped[datetime] = mapped_column(default=datetime.utcnow)
    gmt_modify_time: Mapped[datetime] = mapped_column(default=datetime.utcnow)


In [40]:
class RegionInfo(IIdModel, IDel, IModel):
    """
        行政区划表
    """
    val_en: Mapped[str] = mapped_column()
    val_ch: Mapped[str] = mapped_column()
    pid: Mapped[int] = mapped_column(default=-1)
    # location: Mapped[str] = mapped_column()
    # city: Mapped[str] = mapped_column()
    # city_name_ch: Mapped[str] = mapped_column()
    # country: Mapped[int] = mapped_column(nullable=False, default=DEFAULT_COUNTRY_INDEX)
    __tablename__ = 'region_info'

In [41]:
class StationInfo(IIdModel, IDel, IModel):
    station_name: Mapped[str] = mapped_column(default='DEFAULT')
    station_code: Mapped[str] = mapped_column(default='DEFAULT')
    lat: Mapped[float] = mapped_column(nullable=True)
    lon: Mapped[float] = mapped_column(nullable=True)
    desc: Mapped[str] = mapped_column()
    is_abs: Mapped[int] = mapped_column(nullable=False, default=0)
    # 所属父级 id
    pid: Mapped[int] = mapped_column(default=0)
    is_in_common_use: Mapped[int] = mapped_column(nullable=False, default=0)
    sort: Mapped[int] = mapped_column(nullable=False, default=0)
    # 归属的行政区划id tb: RegionInfo
    rid: Mapped[int] = mapped_column(default=0)
    __tablename__ = 'station_info'


In [42]:
data.iloc[0]

code             rodr
location    Rodrigues
city           罗德里格斯岛
country     Mauritius
lat          -19.6802
lon           63.4212
Name: 0, dtype: object

In [43]:
set(data['country'].tolist())

{'Bangladesh',
 'France',
 'India',
 'Indonesia',
 'Kenya',
 'Madagascar',
 'Malaysia',
 'Maldive Islands',
 'Mauritius',
 'Myanmar',
 'Oman',
 'Pakistan',
 'Seychelles',
 'Sri Lanka',
 'Tanzania',
 'UK'}

In [44]:
session = DbFactory().Session


### step1:录入对应的国家


In [45]:
try:
    for country in set(data['country'].tolist()):
        region_model = RegionInfo(val_en=country, val_ch='')
        session.add(region_model)
    session.commit()

except Exception as ex:
    print(ex.args)
finally:

    session.close()


### step2:根据国家字典录入对应不同的城市


In [46]:
# dict_country = {
#     'SouthernKorea': 1,
#     'PuertoRico': 2,
#     'FijiIslands': 3,
#     'France': 4,
#     'SolomonIslands': 5,
#     'USASamoa': 6,
#     'Nicaragua': 7,
#     'Australia': 8,
#     'USA': 9,
#     'Japan': 10,
#     'Mexico': 11,
#     'NewZealand': 12,
#     'IndependentStateofWesternSamoa': 13,
#     'Philippines': 14,
#     'VietNam': 15,
#     'CostaRica': 16,
#     'PapuaNewGuinea': 17,
#     'Vanuatu': 18,
#     'ElSalvador': 19,

# }

# pa
# dict_country = {
# 'Saint Lucia':344,
# 'France':345,
# 'Dominican Republic':346,
# 'Jamaica':347,
# 'Grenada':348,
# 'Trinidad & Tobago':349,
# 'Bahamas/Bahama Islands':350,
# 'Aruba; Nederland':351,
# 'Haïti':352,
# 'Saint Vincent & Grenadines':353,
# 'Colombia':354,
# 'Nicaragua':355,
# 'Guatemala':356,
# 'Curaçao':357,
# 'Honduras':358,
# 'Panamá':359,
# 'St. Kitts & Nevis':360,
# 'Dominica island':361,
# 'Antigua, Barbuda & Redonda':362,
# 'Costa Rica':363,
# 'Anguilla':364,
# 'Belize':365,
# 'Martinique':366,

# }

# 北海
# dict_country = {
#     'Norway':409,
#     'Belgium':410,
#     'Germany':411,
#     'Sweden':412,
#     'Netherlands':413,
#     'UK':414,
#     'France':345,

# }

# 印度洋
dict_country = {
    'Oman':480,
    'Tanzania':481,
    'Kenya':482,
    'Myanmar':483,
    'Malaysia':484,
    'UK':414,
    'India':486,
    'Seychelles':487,
    'Maldive Islands':488,
    'France':345,
    'Indonesia':490,
    'Sri Lanka':491,
    'Madagascar':492,
    'Pakistan':493,
    'Mauritius':494,
    'Bangladesh':495,
    
}

In [47]:
num_rows=data.shape[0]
num_rows

60

更新表 `RegionInfo` 中的城市

In [49]:

def check_stand(country, cid, city, location):
    if pd.notna(country) and pd.notna(cid) and pd.notna(city) and pd.notna(location):
        return True
    else:
        return False


for index in range(num_rows-1):
    series_column = data.iloc[index]
    print(series_column)

    country_str: str = series_column['country']
    # 从字典中找到对应的id
    cid: int = dict_country.get(country_str, -1)
    location_str: str = series_column['location']
    city_str: str = series_column['city']
    if check_stand(country_str, cid, city_str, location_str):
        region_model = RegionInfo(
            val_en=location_str, val_ch=city_str, pid=cid)
        session.add(region_model)
# UnicodeEncodeError: 'charmap' codec can't encode characters in position 0-2: character maps to <undefined>
try:
    session.commit()
except Exception as ex:
    session.rollback()
    print(ex.args)
finally:
    session.close()


code             rodr
location    Rodrigues
city           罗德里格斯岛
country     Mauritius
lat          -19.6802
lon           63.4212
Name: 0, dtype: object
code             ptlu
location     Pt.Louis
city             路易斯港
country     Mauritius
lat          -20.1572
lon           57.5043
Name: 1, dtype: object
code            blueb
location     Blue Bay
city               蓝湾
country     Mauritius
lat          -20.4441
lon           57.7109
Name: 2, dtype: object
code                                     reun2
location    Pointe des Galets (Reunion Island)
city                                       加莱角
country                                 France
lat                                     -20.92
lon                                      55.28
Name: 3, dtype: object
code              toam2
location    Toamasina 2
city              图阿马西纳
country      Madagascar
lat            -18.1536
lon             49.4281
Name: 4, dtype: object
code                     dzao2
location    Dzaoudzi (Mayotte)
c

In [50]:
# step3:录入station信息
def check_stand_station(code, location, lat, lon):
    if pd.notna(code) and pd.notna(location) and pd.notna(lat) and pd.notna(lon):
        return True
    else:
        return False


def get_rid(location: str) -> int:
    """
        根据 location 从 region_info 表中获取对应的rid
    """
    query = select(RegionInfo).where(RegionInfo.val_en == location)
    filter = session.scalars(query).fetchall()
    rid: int = -1
    if len(filter) > 0:
        # 取出第一个
        find_one = filter[0]
        rid = find_one.id
        pass
    return rid
    pass


for index in range(num_rows-1):
    series_column = data.iloc[index]
    print(series_column)
    code: str = series_column['code']
    location_str: str = series_column['location']
    lat: float = series_column['lat']
    lon: float = series_column['lon']
    if check_stand_station(code, location_str, lat, lon):
        rid: int = get_rid(location_str)
        station_model = StationInfo(
            station_code=code, lat=lat, lon=lon, rid=rid)
        session.add(station_model)
# UnicodeEncodeError: 'charmap' codec can't encode characters in position 0-2: character maps to <undefined>
try:
    session.commit()
except Exception as ex:
    session.rollback()
    print(ex.args)
finally:
    session.close()


code             rodr
location    Rodrigues
city           罗德里格斯岛
country     Mauritius
lat          -19.6802
lon           63.4212
Name: 0, dtype: object
code             ptlu
location     Pt.Louis
city             路易斯港
country     Mauritius
lat          -20.1572
lon           57.5043
Name: 1, dtype: object
code            blueb
location     Blue Bay
city               蓝湾
country     Mauritius
lat          -20.4441
lon           57.7109
Name: 2, dtype: object
code                                     reun2
location    Pointe des Galets (Reunion Island)
city                                       加莱角
country                                 France
lat                                     -20.92
lon                                      55.28
Name: 3, dtype: object
code              toam2
location    Toamasina 2
city              图阿马西纳
country      Madagascar
lat            -18.1536
lon             49.4281
Name: 4, dtype: object
code                     dzao2
location    Dzaoudzi (Mayotte)
c