## 2次元のデータ

In [3]:
%use lets-plot
%use dataframe

In [9]:
// 家庭内での兄弟と姉妹の身長の組み
val meleAndFemale = listOf("男（兄弟）", "女（姉妹）")
val maleHeights = listOf(71.0, 68.0, 66.0, 67.0, 70.0, 71.0, 70.0, 73.0, 72.0, 65.0, 66.0) // inchi
val femaleHeights = listOf(69.0, 64.0, 65.0, 63.0, 65.0, 62.0, 65.0, 64.0, 66.0, 59.0, 62.0) // inchi

val dataMeleAndFemale = mapOf(
    "male" to maleHeights,
    "female" to femaleHeights
)

val plotMeleAndFemale = letsPlot(dataMeleAndFemale) +
        geomPoint(size = 5.0, color = "blue") {
            x = "male"
            y = "female"
        } +
        ggtitle("家庭内の兄弟・姉妹の身長の組み") +
        xlab("兄弟の身長") +
        ylab("姉妹の身長")

plotMeleAndFemale


In [7]:
// 年齢階級と血圧の平均値
val ageAndBloodPressure = listOf("年齢階級", "血圧の平均")
val ageClass = listOf(35.0, 45.0, 55.0, 65.0, 75.0) // 歳
val bloodPressure = listOf(114.0, 124.0, 143.0, 158.0, 166.0) // mmHg（水銀柱ミリバール）

val dataAgeAndBloodPressure = mapOf(
    "age" to ageClass,
    "bp" to bloodPressure
)

val plotAgeAndBloodPressure = letsPlot(dataAgeAndBloodPressure) +
        geomPoint(size = 5.0, color = "red") {
            x = "age"
            y = "bp"
        } +
        ggtitle("年齢階級と血圧の平均値") +
        xlab("年齢階級（歳）") +
        ylab("血圧の平均 (mmHg)")

plotAgeAndBloodPressure

### 3.2 散布図と分割表

In [None]:
/**
 * eStatからデータを取るための共通処理
 */
@file:DependsOn("org.jetbrains.kotlinx:kotlinx-serialization-json:1.6.3")

import java.net.URI
import java.net.URLEncoder
import java.net.http.HttpClient
import java.net.http.HttpRequest
import java.net.http.HttpResponse
import kotlinx.serialization.json.*

fun httpGet(url: String): String {
    val request = HttpRequest.newBuilder()
        .uri(URI.create(url))
        .GET()
        .build()
    val response = client.send(request, HttpResponse.BodyHandlers.ofString())

    if (response.statusCode() != 200) {
        throw RuntimeException("HTTP Error: ${response.statusCode()} - ${response.body()}")
    }
    return response.body()
}

fun enc(s: String) = URLEncoder.encode(s, "UTF-8")

val prefCodeMap = mapOf(
    "01000" to "北海道", "02000" to "青森県", "03000" to "岩手県", "04000" to "宮城県", "05000" to "秋田県",
    "06000" to "山形県", "07000" to "福島県", "08000" to "茨城県", "09000" to "栃木県", "10000" to "群馬県",
    "11000" to "埼玉県", "12000" to "千葉県", "13000" to "東京都", "14000" to "神奈川県", "15000" to "新潟県",
    "16000" to "富山県", "17000" to "石川県", "18000" to "福井県", "19000" to "山梨県", "20000" to "長野県",
    "21000" to "岐阜県", "22000" to "静岡県", "23000" to "愛知県", "24000" to "三重県", "25000" to "滋賀県",
    "26000" to "京都府", "27000" to "大阪府", "28000" to "兵庫県", "29000" to "奈良県", "30000" to "和歌山県",
    "31000" to "鳥取県", "32000" to "島根県", "33000" to "岡山県", "34000" to "広島県", "35000" to "山口県",
    "36000" to "徳島県", "37000" to "香川県", "38000" to "愛媛県", "39000" to "高知県", "40000" to "福岡県",
    "41000" to "佐賀県", "42000" to "長崎県", "43000" to "熊本県", "44000" to "大分県", "45000" to "宮崎県",
    "46000" to "鹿児島県", "47000" to "沖縄県"
)
val cdArea = prefCodeMap.keys.joinToString(",")

// Auth用
val appId = "3d27beda866e7af56c270a677a96668ee48a5430"



In [7]:
// モダンな書き方
val client = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NORMAL).build()

data class PrefRetail(val prefCode: String, val prefName: String, val retailStores: Int)

val statsRetailDataId = "0004003256" // 経済センサス‐活動調査 令和３年経済センサス‐活動調査 事業所に関する集計 産業別集計 卸売業，小売業に関する集計 産業編（都道府県表）

val urlRetail = "https://api.e-stat.go.jp/rest/3.0/app/json/getStatsData" +
        "?appId=${enc(appId)}" +
        "&statsDataId=${enc(statsRetailDataId)}" +
        "&cdCat01=9&cdCat02=I2&cdTime=2021000000" +
        "&cdArea=${enc(cdArea)}" +
        "&metaGetFlg=N&cntGetFlg=N&explanationGetFlg=N&annotationGetFlg=N&sectionHeaderFlg=1"

// 3. データ取得と安全な抽出
val jsonResponse = httpGet(urlRetail)
val jsonElement = Json.parseToJsonElement(jsonResponse)

// 安全にデータを掘り下げる
val retailValues = jsonElement.jsonObject["GET_STATS_DATA"]
    ?.jsonObject?.get("STATISTICAL_DATA")
    ?.jsonObject?.get("DATA_INF")
    ?.jsonObject?.get("VALUE")
    ?.jsonArray 
    ?: throw RuntimeException("データが見つかりませんでした。APIレスポンスを確認してください。")

// 4. マッピング処理
val retailByPref: List<PrefRetail> = retailValues.mapNotNull { element ->
    val obj = element as? JsonObject ?: return@mapNotNull null
    
    val prefCode = obj["@area"]?.jsonPrimitive?.content
    val rawValue = obj["$"]?.jsonPrimitive?.content ?: obj["@v"]?.jsonPrimitive?.content
    
    if (prefCode != null && rawValue != null) {
        val prefName = prefCodeMap[prefCode] ?: "地域($prefCode)"
        PrefRetail(prefCode, prefName, rawValue.toInt())
    } else {
        null
    }
}

// 結果の表示
retailByPref.take(5).forEach { println(it) }


PrefRetail(prefCode=01000, prefName=北海道, retailStores=36771)
PrefRetail(prefCode=02000, prefName=青森県, retailStores=10744)
PrefRetail(prefCode=03000, prefName=岩手県, retailStores=10512)
PrefRetail(prefCode=04000, prefName=宮城県, retailStores=16838)
PrefRetail(prefCode=05000, prefName=秋田県, retailStores=8858)


In [13]:
/**
 * 県別人口
 * http://api.e-stat.go.jp/rest/3.0/app/json/getStatsData
 *  ?cdArea=01000%2C02000%2C03000%2C04000%2C05000%2C06000%2C07000%2C08000%2C09000%2C10000%2C11000%2C12000%2C13000%2C14000%2C15000%2C16000%2C17000%2C18000%2C19000%2C20000%2C21000%2C22000%2C23000%2C24000%2C25000%2C26000%2C27000%2C28000%2C29000%2C30000%2C31000%2C32000%2C33000%2C34000%2C35000%2C36000%2C37000%2C38000%2C39000%2C40000%2C41000%2C42000%2C43000%2C44000%2C45000%2C46000%2C47000
 *  &appId=
 *  &lang=J
 *  &statsDataId=0003448231
 *  &metaGetFlg=Y&cntGetFlg=N&explanationGetFlg=Y&annotationGetFlg=Y&sectionHeaderFlg=1&replaceSpChars=0
 */

val populationClient = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NORMAL).build()

data class PrefPopulation(val prefCode: String, val prefName: String, val population: Int)

val statPoplationId = "0003448231"
val cdCat01 = "000"   // 男女別：総数
val cdCat02 = "01000" // 人口：総人口

// URL構築（cdCatパラメータを修正）
val urlPopulation = "https://api.e-stat.go.jp/rest/3.0/app/json/getStatsData" +
        "?appId=${enc(appId)}" +
        "&statsDataId=${enc(statsDataId)}" +
        "&cdCat01=${enc(cdCat01)}" +  // 修正した値をセット
        "&cdCat02=${enc(cdCat02)}" +  // 修正した値をセット
        "&cdTime=2021000000" +        // 時点は必要に応じて確認
        "&cdArea=${enc(cdArea)}" +
        "&metaGetFlg=N&cntGetFlg=N&sectionHeaderFlg=1"

// 3. データ取得と安全な抽出
val jsonPopulationResponse = httpGet(urlPopulation)
val jsonPopulationElement = Json.parseToJsonElement(jsonPopulationResponse)
println(jsonPopulationElement)

// データ部分を安全に取得（DATA_INFがない場合はnullになり、エルビス演算子で空リストを返す）
val poplulationValues = jsonElement.jsonObject["GET_STATS_DATA"]
     ?.jsonObject?.get("STATISTICAL_DATA")
     ?.jsonObject?.get("DATA_INF")
     ?.jsonObject?.get("VALUE")
     ?.jsonArray
     ?: throw RuntimeException("人口データが見つかりませんでした。")

val populationByPref: List<PrefPopulation> = poplulationValues.mapNotNull { element ->
    val obj = element as? JsonObject ?: return@mapNotNull null

    val prefCode = obj["@area"]?.jsonPrimitive?.content
    val rawValue = obj["$"]?.jsonPrimitive?.content ?: obj["@v"]?.jsonPrimitive?.content ?: return@mapNotNull null

    val popValue = rawValue.toIntOrNull() ?: 0
    val prefName = prefCodeMap[prefCode] ?: "地域($prefCode)"
    PrefPopulation(prefCode, prefName, popValue)
}

println("取得件数: ${populationByPref.size}")
populationByPref.take(5).forEach { println(it) }


org.jetbrains.kotlinx.jupyter.exceptions.ReplCompilerException: at Cell In[13], line 50, column 20: Type mismatch: inferred type is String? but String was expected

決定係数の導出。

$$
\begin{align}
    \sum d_i^2
    &= \sum (y_i - \hat{y}_i)^2 \\[8pt]
    &= \sum (y_i - b x_i - a)^2 \\[8pt]
    &= \sum \left(
        y_i - r\frac{S_y}{S_x} x_i - \bar{y} + r\frac{S_y}{S_x}\bar{x}
    \right)^2 \\[8pt]
    &= \sum(y_i - \bar{y})^h
    - 2 r\frac{S_y}{S_x} \sum (y_i - \bar{y}) (x_i - \bar{x})
    + r^2 \frac{S_y^2}{S_x^2} \sum (x_i - \bar{x})^2 \\[8pt]
    &= \sum(y_i - \bar{y})^2
    - 2 n r\frac{S_y}{S_x} C_{xy}
    + n r^2 \frac{S_y^2}{S_x^2} S_x^2 \\[8pt]
    &= \sum(y_i - \bar{y})^2
    - 2 n r S_y^2 \frac{C_{xy}}{S_x S_y}
    + n r^2 S_y^2 \\[8pt]
    &= \sum(y_i - \bar{y})^2
    - 2 n r^2 S_y^2
    + n r^2 S_y^2 \\[8pt]
    &= \sum(y_i - \bar{y})^2
    - n r^2 S_y^2 \\[8pt]
    &= \sum(y_i - \bar{y})^2 - r^2 \sum(y_i - \bar{y})^2 \\[8pt]
    &= (1 - r^2) \sum(y_i - \bar{y})^2
\end{align}
$$